Module `preprocessor.spec`

Classes

class Field (name: str, label: str, meta: FieldMeta, gen_type: GenType, data: FieldData, custom: Dict[int, Any], restricted: bool, is_masked: bool | None = None, generator: Generator | None = None)

Field(name: str, label: str, meta: preprocessor.spec.FieldMeta, gen_type: preprocessor.spec.GenType, data: preprocessor.spec.FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var custom : Dict[int, Any]
var data : FieldData
var gen_type : GenType
var generator : Generator | None
var is_masked : bool | None
var label : str
var meta : FieldMeta
var name : str
var restricted : bool

Methods

def from_field(self, from_field)
def generate_data(self, count)
def set_random(self)
def set_sample(self, df)
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str

class FieldData (random: List[Any], sample: List[Any])

FieldData(random: List[Any], sample: List[Any])

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var random : List[Any]
var sample : List[Any]

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str

FieldMeta(type: preprocessor.spec.SupportedType, range: Optional[preprocessor.spec.Range], decimals: Optional[int], length: Optional[preprocessor.spec.Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var decimals : int | None
var has_inf : bool | None
var has_nan : bool | None
var has_neg_inf : bool | None
var length : Range | None
var range : Range | None
var type : SupportedType
var word_mode : int | None

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str

FieldOverride(name: str, label: Optional[str] = None, gen_type: Optional[preprocessor.spec.GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var custom : Dict[int, Any] | None
var gen_type : GenType | None
var generator : Generator | None
var is_masked : bool | None
var label : str | None
var name : str
var restricted : bool | None

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str

class GenType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Ancestors

enum.Enum

Class variables

var Random
var Sample

class Generator (type: str, params: Dict[str, Any] | None = None)

Generator(type: str, params: Optional[Dict[str, Any]] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var params : Dict[str, Any] | None
var type : str

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str

class Range (min: float, max: float)

Range(min: float, max: float)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var max : float
var min : float

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str

class Spec (fields: List[Field])

Spec(fields: List[preprocessor.spec.Field])

Static methods

def from_dataframe(df: pandas.core.frame.DataFrame, size: int | None = None, custom: Dict | None = None, is_masked: bool = True, unmask_columns: List[str] | None = None) -> Spec

Generate a dataset .spec from a dataframe

Args

df : pd.DataFrame: The source dataframe
size : Optional[int], optional: Number of records to include in sample (mock) data. Default is 10.
custom : Optional[Dict], optional: Custom values to add to the spec. Dict must contain fieldnames that match the columns in the dataframe, values become custom values for that field in the spec. Defaults to None.
is_masked : bool, optional: Should all fields be masked? Defaults to True.
unmask_columns : Optional[List[str]], optional: Individual fields to unmask, overriding is_masked=True. Defaults to None.

Raises

Exception: "Unable to detect column type."
Exception: "Unable to find column '{col}' from unmask_columns list."

Returns

Spec: The Spec which describes the data from the dataframe.

def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A

def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var fields : List[Field]
var unrestricted_fields : Iterator[Field]

Methods

def apply_override(self, overrides: List[FieldOverride])
def as_frame(self) -> pandas.core.frame.DataFrame
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str

class SupportedType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Ancestors

builtins.str
enum.Enum

Class variables

var Bool
var Float64
var Int64
var String