Module preprocessor.spec

Classes

class Field (name: str, label: str, meta: FieldMeta, gen_type: GenType, data: FieldData, custom: Dict[int, Any], restricted: bool, is_masked: bool | None = None, generator: Generator | None = None)

Field(name: str, label: str, meta: preprocessor.spec.FieldMeta, gen_type: preprocessor.spec.GenType, data: preprocessor.spec.FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)

Class variables

var custom : Dict[int, Any]
var dataFieldData
var gen_typeGenType
var generatorGenerator | None
var is_masked : bool | None
var label : str
var metaFieldMeta
var name : str
var restricted : bool

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Methods

def from_field(self, from_field)
def generate_data(self, count)
def set_random(self)
def set_sample(self, df)
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldData (random: List[Any], sample: List[Any])

FieldData(random: List[Any], sample: List[Any])

Class variables

var random : List[Any]
var sample : List[Any]

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldMeta (type: SupportedType, range: Range | None, decimals: int | None, length: Range | None, has_inf: bool | None = None, has_neg_inf: bool | None = None, has_nan: bool | None = None, word_mode: int | None = None)

FieldMeta(type: preprocessor.spec.SupportedType, range: Optional[preprocessor.spec.Range], decimals: Optional[int], length: Optional[preprocessor.spec.Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)

Class variables

var decimals : int | None
var has_inf : bool | None
var has_nan : bool | None
var has_neg_inf : bool | None
var lengthRange | None
var rangeRange | None
var typeSupportedType
var word_mode : int | None

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldOverride (name: str, label: str | None = None, gen_type: GenType | None = None, custom: Dict[int, Any] | None = None, restricted: bool | None = None, is_masked: bool | None = None, generator: Generator | None = None)

FieldOverride(name: str, label: Optional[str] = None, gen_type: Optional[preprocessor.spec.GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)

Class variables

var custom : Dict[int, Any] | None
var gen_typeGenType | None
var generatorGenerator | None
var is_masked : bool | None
var label : str | None
var name : str
var restricted : bool | None

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class GenType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Ancestors

  • enum.Enum

Class variables

var Random
var Sample
class Generator (type: str, params: Dict[str, Any] | None = None)

Generator(type: str, params: Optional[Dict[str, Any]] = None)

Class variables

var params : Dict[str, Any] | None
var type : str

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Range (min: float, max: float)

Range(min: float, max: float)

Class variables

var max : float
var min : float

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Spec (fields: List[Field])

Spec(fields: List[preprocessor.spec.Field])

Class variables

var fields : List[Field]

Static methods

def from_dataframe(df: pandas.core.frame.DataFrame, size: int | None = None, custom: Dict | None = None, is_masked: bool = True, unmask_columns: List[str] | None = None) -> Spec

Generate a dataset .spec from a dataframe

Args

df : pd.DataFrame
The source dataframe
size : Optional[int], optional
Number of records to include in sample (mock) data. Default is 10.
custom : Optional[Dict], optional
Custom values to add to the spec. Dict must contain fieldnames that match the columns in the dataframe, values become custom values for that field in the spec. Defaults to None.
is_masked : bool, optional
Should all fields be masked? Defaults to True.
unmask_columns : Optional[List[str]], optional
Individual fields to unmask, overriding is_masked=True. Defaults to None.

Raises

Exception
"Unable to detect column type."
Exception
"Unable to find column '{col}' from unmask_columns list."

Returns

Spec
The Spec which describes the data from the dataframe.
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var unrestricted_fields : Iterator[Field]

Methods

def apply_override(self, overrides: List[FieldOverride])
def as_frame(self) -> pandas.core.frame.DataFrame
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class SupportedType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Ancestors

  • builtins.str
  • enum.Enum

Class variables

var Bool
var Float64
var Int64
var String