Module preprocessor.spec
Classes
class Field (name: str, label: str, meta: FieldMeta, gen_type: GenType, data: FieldData, custom: Dict[int, Any], restricted: bool, is_masked: bool | None = None, generator: Generator | None = None)
-
Field(name: str, label: str, meta: preprocessor.spec.FieldMeta, gen_type: preprocessor.spec.GenType, data: preprocessor.spec.FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var custom : Dict[int, Any]
var data : FieldData
var gen_type : GenType
var generator : Generator | None
var is_masked : bool | None
var label : str
var meta : FieldMeta
var name : str
var restricted : bool
Methods
def from_field(self, from_field)
def generate_data(self, count)
def set_random(self)
def set_sample(self, df)
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldData (random: List[Any], sample: List[Any])
-
FieldData(random: List[Any], sample: List[Any])
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var random : List[Any]
var sample : List[Any]
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldMeta (type: SupportedType, range: Range | None, decimals: int | None, length: Range | None, has_inf: bool | None = None, has_neg_inf: bool | None = None, has_nan: bool | None = None, word_mode: int | None = None)
-
FieldMeta(type: preprocessor.spec.SupportedType, range: Optional[preprocessor.spec.Range], decimals: Optional[int], length: Optional[preprocessor.spec.Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var decimals : int | None
var has_inf : bool | None
var has_nan : bool | None
var has_neg_inf : bool | None
var length : Range | None
var range : Range | None
var type : SupportedType
var word_mode : int | None
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldOverride (name: str, label: str | None = None, gen_type: GenType | None = None, custom: Dict[int, Any] | None = None, restricted: bool | None = None, is_masked: bool | None = None, generator: Generator | None = None)
-
FieldOverride(name: str, label: Optional[str] = None, gen_type: Optional[preprocessor.spec.GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var custom : Dict[int, Any] | None
var gen_type : GenType | None
var generator : Generator | None
var is_masked : bool | None
var label : str | None
var name : str
var restricted : bool | None
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class GenType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Ancestors
- enum.Enum
Class variables
var Random
var Sample
class Generator (type: str, params: Dict[str, Any] | None = None)
-
Generator(type: str, params: Optional[Dict[str, Any]] = None)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var params : Dict[str, Any] | None
var type : str
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Range (min: float, max: float)
-
Range(min: float, max: float)
Static methods
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var max : float
var min : float
Methods
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Spec (fields: List[Field])
-
Spec(fields: List[preprocessor.spec.Field])
Static methods
def from_dataframe(df: pandas.core.frame.DataFrame, size: int | None = None, custom: Dict | None = None, is_masked: bool = True, unmask_columns: List[str] | None = None) -> Spec
-
Generate a dataset .spec from a dataframe
Args
df
:pd.DataFrame
- The source dataframe
size
:Optional[int]
, optional- Number of records to include in sample (mock) data. Default is 10.
custom
:Optional[Dict]
, optional- Custom values to add to the spec. Dict must contain fieldnames that match the columns in the dataframe, values become custom values for that field in the spec. Defaults to None.
is_masked
:bool
, optional- Should all fields be masked? Defaults to True.
unmask_columns
:Optional[List[str]]
, optional- Individual fields to unmask, overriding is_masked=True. Defaults to None.
Raises
Exception
- "Unable to detect column type."
Exception
- "Unable to find column '{col}' from unmask_columns list."
Returns
Spec
- The Spec which describes the data from the dataframe.
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
Methods
def apply_override(self, overrides: List[FieldOverride])
def as_frame(self) -> pandas.core.frame.DataFrame
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class SupportedType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Ancestors
- builtins.str
- enum.Enum
Class variables
var Bool
var Float64
var Int64
var String