Module preprocessor.spec
Classes
class Field (name: str, label: str, meta: FieldMeta, gen_type: GenType, data: FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[Generator] = None)
-
Field(name: str, label: str, meta: preprocessor.spec.FieldMeta, gen_type: preprocessor.spec.GenType, data: preprocessor.spec.FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Class variables
var custom : Dict[int, Any]
var data : FieldData
var gen_type : GenType
var generator : Optional[Generator]
var is_masked : Optional[bool]
var label : str
var meta : FieldMeta
var name : str
var restricted : bool
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def from_field(self, from_field)
def generate_data(self, count)
def set_random(self)
def set_sample(self, df)
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldData (random: List[Any], sample: List[Any])
-
FieldData(random: List[Any], sample: List[Any])
Class variables
var random : List[Any]
var sample : List[Any]
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldMeta (type: SupportedType, range: Optional[Range], decimals: Optional[int], length: Optional[Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)
-
FieldMeta(type: preprocessor.spec.SupportedType, range: Optional[preprocessor.spec.Range], decimals: Optional[int], length: Optional[preprocessor.spec.Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)
Class variables
var decimals : Optional[int]
var has_inf : Optional[bool]
var has_nan : Optional[bool]
var has_neg_inf : Optional[bool]
var length : Optional[Range]
var range : Optional[Range]
var type : SupportedType
var word_mode : Optional[int]
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldOverride (name: str, label: Optional[str] = None, gen_type: Optional[GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[Generator] = None)
-
FieldOverride(name: str, label: Optional[str] = None, gen_type: Optional[preprocessor.spec.GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)
Class variables
var custom : Optional[Dict[int, Any]]
var gen_type : Optional[GenType]
var generator : Optional[Generator]
var is_masked : Optional[bool]
var label : Optional[str]
var name : str
var restricted : Optional[bool]
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class GenType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Ancestors
- enum.Enum
Class variables
var Random
var Sample
class Generator (type: str, params: Optional[Dict[str, Any]] = None)
-
Generator(type: str, params: Optional[Dict[str, Any]] = None)
Class variables
var params : Optional[Dict[str, Any]]
var type : str
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Range (min: float, max: float)
-
Range(min: float, max: float)
Class variables
var max : float
var min : float
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Methods
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Spec (fields: List[Field])
-
Spec(fields: List[preprocessor.spec.Field])
Class variables
var fields : List[Field]
Static methods
def from_dataframe(df: pandas.core.frame.DataFrame, size: Optional[int] = None, custom: Optional[Dict] = None, is_masked: bool = True, unmask_columns: Optional[List[str]] = None) -> Spec
-
Generate a dataset .spec from a dataframe
Args
df
:pd.DataFrame
- The source dataframe
size
:Optional[int]
, optional- Number of records to include in sample (mock) data. Default is 10.
custom
:Optional[Dict]
, optional- Custom values to add to the spec. Dict must contain fieldnames that match the columns in the dataframe, values become custom values for that field in the spec. Defaults to None.
is_masked
:bool
, optional- Should all fields be masked? Defaults to True.
unmask_columns
:Optional[List[str]]
, optional- Individual fields to unmask, overriding is_masked=True. Defaults to None.
Raises
Exception
- "Unable to detect column type."
Exception
- "Unable to find column '{col}' from unmask_columns list."
Returns
Spec
- The Spec which describes the data from the dataframe.
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) -> ~A
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]
Instance variables
var unrestricted_fields : Iterator[Field]
Methods
def apply_override(self, overrides: List[FieldOverride])
def as_frame(self) -> pandas.core.frame.DataFrame
def to_dict(self, encode_json=False) -> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class SupportedType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
An enumeration.
Ancestors
- builtins.str
- enum.Enum
Class variables
var Bool
var Float64
var Int64
var String