Module preprocessor.spec

Classes

class Field (name: str, label: str, meta: FieldMeta, gen_type: GenType, data: FieldData, custom: Dict[int, Any], restricted: bool, is_masked: bool | None = None, generator: Generator | None = None)

Field(name: str, label: str, meta: preprocessor.spec.FieldMeta, gen_type: preprocessor.spec.GenType, data: preprocessor.spec.FieldData, custom: Dict[int, Any], restricted: bool, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var custom : Dict[int, Any]
var dataFieldData
var gen_typeGenType
var generatorGenerator | None
var is_masked : bool | None
var label : str
var metaFieldMeta
var name : str
var restricted : bool

Methods

def from_field(self, from_field)
def generate_data(self, count)
def set_random(self)
def set_sample(self, df)
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldData (random: List[Any], sample: List[Any])

FieldData(random: List[Any], sample: List[Any])

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var random : List[Any]
var sample : List[Any]

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldMeta (type: SupportedType, range: Range | None, decimals: int | None, length: Range | None, has_inf: bool | None = None, has_neg_inf: bool | None = None, has_nan: bool | None = None, word_mode: int | None = None)

FieldMeta(type: preprocessor.spec.SupportedType, range: Optional[preprocessor.spec.Range], decimals: Optional[int], length: Optional[preprocessor.spec.Range], has_inf: Optional[bool] = None, has_neg_inf: Optional[bool] = None, has_nan: Optional[bool] = None, word_mode: Optional[int] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var decimals : int | None
var has_inf : bool | None
var has_nan : bool | None
var has_neg_inf : bool | None
var lengthRange | None
var rangeRange | None
var typeSupportedType
var word_mode : int | None

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class FieldOverride (name: str, label: str | None = None, gen_type: GenType | None = None, custom: Dict[int, Any] | None = None, restricted: bool | None = None, is_masked: bool | None = None, generator: Generator | None = None)

FieldOverride(name: str, label: Optional[str] = None, gen_type: Optional[preprocessor.spec.GenType] = None, custom: Optional[Dict[int, Any]] = None, restricted: Optional[bool] = None, is_masked: Optional[bool] = None, generator: Optional[preprocessor.spec.Generator] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var custom : Dict[int, Any] | None
var gen_typeGenType | None
var generatorGenerator | None
var is_masked : bool | None
var label : str | None
var name : str
var restricted : bool | None

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class GenType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Ancestors

  • enum.Enum

Class variables

var Random
var Sample
class Generator (type: str, params: Dict[str, Any] | None = None)

Generator(type: str, params: Optional[Dict[str, Any]] = None)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var params : Dict[str, Any] | None
var type : str

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Range (min: float, max: float)

Range(min: float, max: float)

Static methods

def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var max : float
var min : float

Methods

def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class Spec (fields: List[Field])

Spec(fields: List[preprocessor.spec.Field])

Static methods

def from_dataframe(df: pandas.core.frame.DataFrame, size: int | None = None, custom: Dict | None = None, is_masked: bool = True, unmask_columns: List[str] | None = None) -> Spec

Generate a dataset .spec from a dataframe

Args

df : pd.DataFrame
The source dataframe
size : Optional[int], optional
Number of records to include in sample (mock) data. Default is 10.
custom : Optional[Dict], optional
Custom values to add to the spec. Dict must contain fieldnames that match the columns in the dataframe, values become custom values for that field in the spec. Defaults to None.
is_masked : bool, optional
Should all fields be masked? Defaults to True.
unmask_columns : Optional[List[str]], optional
Individual fields to unmask, overriding is_masked=True. Defaults to None.

Raises

Exception
"Unable to detect column type."
Exception
"Unable to find column '{col}' from unmask_columns list."

Returns

Spec
The Spec which describes the data from the dataframe.
def from_dict(kvs: dict | list | str | int | float | bool | None, *, infer_missing=False) -> ~A
def from_json(s: str | bytes | bytearray, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> ~A
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> dataclasses_json.mm.SchemaF[~A]

Instance variables

var fields : List[Field]
var unrestricted_fields : Iterator[Field]

Methods

def apply_override(self, overrides: List[FieldOverride])
def as_frame(self) -> pandas.core.frame.DataFrame
def to_dict(self, encode_json=False) -> Dict[str, dict | list | str | int | float | bool | None]
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: int | str | None = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str
class SupportedType (value, names=None, *, module=None, qualname=None, type=None, start=1)

An enumeration.

Ancestors

  • builtins.str
  • enum.Enum

Class variables

var Bool
var Float64
var Int64
var String