Source code for monitor_schema.models.column_schema

"""Schema definitions."""
from enum import Enum
from typing import Dict, List, Optional

from pydantic import Field

from monitor_schema.models.commons import Metadata, NoExtrasBaseModel
from monitor_schema.models.segments import Segment
from monitor_schema.models.utils import COLUMN_NAME_TYPE


[docs]class ColumnDiscreteness(str, Enum): """Classifying the type.""" discrete = 'discrete' continuous = 'continuous'
[docs]class ColumnDataType(str, Enum): """Options for configuring data type for a column.""" integral = 'integral' fractional = 'fractional' boolean = 'bool' string = 'string' unknown = 'unknown' null = 'null'
[docs]class ColumnSchema(NoExtrasBaseModel): """Schema configuration for a column. Should be generated by WhyLabs originally but can be overridden by users. """ discreteness: ColumnDiscreteness = Field( description="Whether a column should be discrete or continuous. WhyLabs will detect this by default but" "users can override this value. Changing this columns will change the default grouping (discrete" " columns vs. continuous columns." ) dataType: ColumnDataType = Field( description='The data type of the columns. Setting this field affects the default grouping (i.e integral ' 'columns) and the frontend behavior (what default metrics to offer. It does NOT affect what is ' 'collected in whylogs (an integral field can still have a frequentItems sketch if it opts in)' ) classifier: Optional[str] = Field( 'input', description="We can classify these columns into various grouping. Currently we only support 'input' " "and 'output'", max_length=20, )
[docs]class WeightConfig(NoExtrasBaseModel): """Object that specifies column weights. - By default, the weight of a column is None (unspecified) - If the weight is unspecified, the column is EXCLUDED when you perform a filter/sort by weight - For sorting, unweighted column take the LEAST PRECEDENCE, meaning that weight column have higher priorities - They are not hierarchical: if a segment weight config is specified and a column does not have a weight in that config, we will not use any hierarchy to resolve the value. It will be None - Order of unweighted column is undefined. """ weights: Dict[COLUMN_NAME_TYPE, float] = Field(description="Weights of the features") # type: ignore
[docs]class SegmentWeightConfig(WeightConfig): """Object that specifies column weights for a segment.""" segment: Optional[Segment] = Field( None, description="Set this value if the weights are for a specific segment. Note that an empty segment (no tags)" "is treated to be the overall segment in this case.", )
[docs]class EntitySchema(NoExtrasBaseModel): """Schema definition of an entity.""" metadata: Optional[Metadata] = Field( None, description="WhyLabs-managed metadata. This is to track various metadata for auditing." ) columns: Dict[COLUMN_NAME_TYPE, ColumnSchema] = Field( # type: ignore description="Schema configuration for the entity" )
[docs]class EntityWeights(NoExtrasBaseModel): """Entity weight configurations.""" metadata: Optional[Metadata] = Field( None, description="WhyLabs-managed metadata. This is to track various metadata for auditing." ) defaultWeights: Optional[WeightConfig] = Field( None, title="DefaultWeights", description="Optional. Default weights for ALL the segments that don't have an explicit weight specification." "Note that if you specify the segment in this WeightConfig object, it is ignored.", ) segmentWeights: Optional[List[SegmentWeightConfig]] = Field( None, title="SegmentWeights", description="Optional. Segment-specific weights. Use this if you want to override the defaultWeights. Note that" "there might be a case where a segment fields are weighted without specifying the default weights", max_items=1000, )