"""Schema definitions."""
from enum import Enum
from typing import Dict, List, Optional
from pydantic import Field
from monitor_schema.models.commons import Metadata, NoExtrasBaseModel
from monitor_schema.models.segments import Segment
from monitor_schema.models.utils import COLUMN_NAME_TYPE
[docs]class ColumnDiscreteness(str, Enum):
"""Classifying the type."""
discrete = 'discrete'
continuous = 'continuous'
[docs]class ColumnDataType(str, Enum):
"""Options for configuring data type for a column."""
integral = 'integral'
fractional = 'fractional'
boolean = 'bool'
string = 'string'
unknown = 'unknown'
null = 'null'
[docs]class ColumnSchema(NoExtrasBaseModel):
"""Schema configuration for a column.
Should be generated by WhyLabs originally but can be overridden by users.
"""
discreteness: ColumnDiscreteness = Field(
description="Whether a column should be discrete or continuous. WhyLabs will detect this by default but"
"users can override this value. Changing this columns will change the default grouping (discrete"
" columns vs. continuous columns."
)
dataType: ColumnDataType = Field(
description='The data type of the columns. Setting this field affects the default grouping (i.e integral '
'columns) and the frontend behavior (what default metrics to offer. It does NOT affect what is '
'collected in whylogs (an integral field can still have a frequentItems sketch if it opts in)'
)
classifier: Optional[str] = Field(
'input',
description="We can classify these columns into various grouping. Currently we only support 'input' "
"and 'output'",
max_length=20,
)
[docs]class WeightConfig(NoExtrasBaseModel):
"""Object that specifies column weights.
- By default, the weight of a column is None (unspecified)
- If the weight is unspecified, the column is EXCLUDED when you perform a filter/sort by weight
- For sorting, unweighted column take the LEAST PRECEDENCE, meaning that weight column have higher priorities
- They are not hierarchical: if a segment weight config is specified and a column does not have a weight in that
config, we will not use any hierarchy to resolve the value. It will be None
- Order of unweighted column is undefined.
"""
weights: Dict[COLUMN_NAME_TYPE, float] = Field(description="Weights of the features") # type: ignore
[docs]class SegmentWeightConfig(WeightConfig):
"""Object that specifies column weights for a segment."""
segment: Optional[Segment] = Field(
None,
description="Set this value if the weights are for a specific segment. Note that an empty segment (no tags)"
"is treated to be the overall segment in this case.",
)
[docs]class EntitySchema(NoExtrasBaseModel):
"""Schema definition of an entity."""
metadata: Optional[Metadata] = Field(
None, description="WhyLabs-managed metadata. This is to track various metadata for auditing."
)
columns: Dict[COLUMN_NAME_TYPE, ColumnSchema] = Field( # type: ignore
description="Schema configuration for the entity"
)
[docs]class EntityWeights(NoExtrasBaseModel):
"""Entity weight configurations."""
metadata: Optional[Metadata] = Field(
None, description="WhyLabs-managed metadata. This is to track various metadata for auditing."
)
defaultWeights: Optional[WeightConfig] = Field(
None,
title="DefaultWeights",
description="Optional. Default weights for ALL the segments that don't have an explicit weight specification."
"Note that if you specify the segment in this WeightConfig object, it is ignored.",
)
segmentWeights: Optional[List[SegmentWeightConfig]] = Field(
None,
title="SegmentWeights",
description="Optional. Segment-specific weights. Use this if you want to override the defaultWeights. Note that"
"there might be a case where a segment fields are weighted without specifying the default weights",
max_items=1000,
)