Module preprocessor.spec
Classes
class Field (name: str, label: str, meta: FieldMeta, gen_type: GenType, data: FieldData, custom: Dict[int, Any], restricted: bool, is_masked: bool | None = None, generator: Generator | None = None)-
Field(name: str, label: str, meta: preprocessor.spec.FieldMeta, gen_type: preprocessor.spec.GenType, data: preprocessor.spec.FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~Adef from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~Adef schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var custom : Dict[int, Any]var data : FieldDatavar gen_type : GenTypevar generator : Generator | Nonevar is_masked : bool | Nonevar label : strvar meta : FieldMetavar name : strvar restricted : bool
Methods
def from_field(self, from_field)def generate_data(self, count)def set_random(self)def set_sample(self, df)def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldData (random: List[Any], sample: List[Any])-
FieldData(random: List[Any], sample: List[Any])
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~Adef from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~Adef schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var random : List[Any]var sample : List[Any]
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldMeta (type: SupportedType, range: Range | None, decimals: int | None, length: Range | None, has_inf: bool | None = None, has_neg_inf: bool | None = None, has_nan: bool | None = None, word_mode: int | None = None)-
FieldMeta(type: preprocessor.spec.SupportedType, range: Optional[preprocessor.spec.Range], decimals: Optional[int], length: Optional[preprocessor.spec.Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~Adef from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~Adef schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var decimals : int | Nonevar has_inf : bool | Nonevar has_nan : bool | Nonevar has_neg_inf : bool | Nonevar length : Range | Nonevar range : Range | Nonevar type : SupportedTypevar word_mode : int | None
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldOverride (name: str, label: str | None = None, gen_type: GenType | None = None, custom: Dict[int, Any] | None = None, restricted: bool | None = None, is_masked: bool | None = None, generator: Generator | None = None)-
FieldOverride(name: str, label: Optional[str] = None, gen_type: Optional[preprocessor.spec.GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~Adef from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~Adef schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var custom : Dict[int, Any] | Nonevar gen_type : GenType | Nonevar generator : Generator | Nonevar is_masked : bool | Nonevar label : str | Nonevar name : strvar restricted : bool | None
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class GenType (value, names=None, *, module=None, qualname=None, type=None, start=1)-
An enumeration.
Ancestors
- enum.Enum
Class variables
var Randomvar Sample
class Generator (type: str, params: Dict[str, Any] | None = None)-
Generator(type: str, params: Optional[Dict[str, Any]] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~Adef from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~Adef schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var params : Dict[str, Any] | Nonevar type : str
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Range (min: float, max: float)-
Range(min: float, max: float)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~Adef from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~Adef schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var max : floatvar min : float
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Spec (fields: List[Field])-
Spec(fields: List[preprocessor.spec.Field])
Static methods
def from_dataframe(df: pandas.core.frame.DataFrame, size: int | None = None, custom: Dict | None = None, is_masked: bool = True, unmask_columns: List[str] | None = None) -> Spec-
Generate a dataset .spec from a dataframe
Args
df:pd.DataFrame- The source dataframe
size:Optional[int], optional- Number of records to include in sample (mock) data. Default is 10.
custom:Optional[Dict], optional- Custom values to add to the spec. Dict must contain fieldnames that match the columns in the dataframe, values become custom values for that field in the spec. Defaults to None.
is_masked:bool, optional- Should all fields be masked? Defaults to True.
unmask_columns:Optional[List[str]], optional- Individual fields to unmask, overriding is_masked=True. Defaults to None.
Raises
Exception- "Unable to detect column type."
Exception- "Unable to find column '{col}' from unmask_columns list."
Returns
Spec- The Spec which describes the data from the dataframe.
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~Adef from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~Adef schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
Methods
def apply_override(self, overrides: List[FieldOverride])def as_frame(self) -> pandas.core.frame.DataFramedef to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class SupportedType (value, names=None, *, module=None, qualname=None, type=None, start=1)-
An enumeration.
Ancestors
- builtins.str
- enum.Enum
Class variables
var Boolvar Float64var Int64var String