Module preprocessor.spec
Classes
class Field (name: str, label: str, meta: FieldMeta, gen_type: GenType, data: FieldData, custom: Dict[int, Any], restricted: bool, is_masked: bool | None = None, generator: Generator | None = None)
-
Field(name: str, label: str, meta: preprocessor.spec.FieldMeta, gen_type: preprocessor.spec.GenType, data: preprocessor.spec.FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Class variables
var custom : Dict[int, Any]
var data : FieldData
var gen_type : GenType
var generator : Generator | None
var is_masked : bool | None
var label : str
var meta : FieldMeta
var name : str
var restricted : bool
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def from_field(self, from_field)
def generate_data(self, count)
def set_random(self)
def set_sample(self, df)
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldData (random: List[Any], sample: List[Any])
-
FieldData(random: List[Any], sample: List[Any])
Class variables
var random : List[Any]
var sample : List[Any]
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldMeta (type: SupportedType, range: Range | None, decimals: int | None, length: Range | None, has_inf: bool | None = None, has_neg_inf: bool | None = None, has_nan: bool | None = None, word_mode: int | None = None)
-
FieldMeta(type: preprocessor.spec.SupportedType, range: Optional[preprocessor.spec.Range], decimals: Optional[int], length: Optional[preprocessor.spec.Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)
Class variables
var decimals : int | None
var has_inf : bool | None
var has_nan : bool | None
var has_neg_inf : bool | None
var length : Range | None
var range : Range | None
var type : SupportedType
var word_mode : int | None
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldOverride (name: str, label: str | None = None, gen_type: GenType | None = None, custom: Dict[int, Any] | None = None, restricted: bool | None = None, is_masked: bool | None = None, generator: Generator | None = None)
-
FieldOverride(name: str, label: Optional[str] = None, gen_type: Optional[preprocessor.spec.GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Class variables
var custom : Dict[int, Any] | None
var gen_type : GenType | None
var generator : Generator | None
var is_masked : bool | None
var label : str | None
var name : str
var restricted : bool | None
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class GenType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Ancestors
- enum.Enum
Class variables
var Random
var Sample
class Generator (type: str, params: Dict[str, Any] | None = None)
-
Generator(type: str, params: Optional[Dict[str, Any]] = None)
Class variables
var params : Dict[str, Any] | None
var type : str
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Range (min: float, max: float)
-
Range(min: float, max: float)
Class variables
var max : float
var min : float
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Spec (fields: List[Field])
-
Spec(fields: List[preprocessor.spec.Field])
Class variables
var fields : List[Field]
Static methods
def from_dataframe(df: pandas.core.frame.DataFrame, size: int | None = None, custom: Dict | None = None, is_masked: bool = True, unmask_columns: List[str] | None = None) -> Spec
-
Generate a dataset .spec from a dataframe
Args
df
:pd.DataFrame
- The source dataframe
size
:Optional[int]
, optional- Number of records to include in sample (mock) data. Default is 10.
custom
:Optional[Dict]
, optional- Custom values to add to the spec. Dict must contain fieldnames that match the columns in the dataframe, values become custom values for that field in the spec. Defaults to None.
is_masked
:bool
, optional- Should all fields be masked? Defaults to True.
unmask_columns
:Optional[List[str]]
, optional- Individual fields to unmask, overriding is_masked=True. Defaults to None.
Raises
Exception
- "Unable to detect column type."
Exception
- "Unable to find column '{col}' from unmask_columns list."
Returns
Spec
- The Spec which describes the data from the dataframe.
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var unrestricted_fields : Iterator[Field]
Methods
def apply_override(self, overrides: List[FieldOverride])
def as_frame(self) -> pandas.core.frame.DataFrame
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class SupportedType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Ancestors
- builtins.str
- enum.Enum
Class variables
var Bool
var Float64
var Int64
var String