Source code for monitor_schema.models.analyzer.analyzer

"""Schema for analyses."""
from typing import Any, Dict, List, Optional, Union

from pydantic import BaseModel, Field, constr

from monitor_schema.models.commons import NoExtrasBaseModel

from ..commons import CronSchedule, FixedCadenceSchedule, Metadata
from ..utils import anyOf_to_oneOf, duration_field
from .algorithms import (
    ColumnListChangeConfig,
    ComparisonConfig,
    ConjunctionConfig,
    DiffConfig,
    DisjunctionConfig,
    DriftConfig,
    ExperimentalConfig,
    FixedThresholdsConfig,
    FrequentStringComparisonConfig,
    ListComparisonConfig,
    SeasonalConfig,
    StddevConfig,
)
from .targets import ColumnMatrix, DatasetMatrix


[docs]class Analyzer(NoExtrasBaseModel):
    """Configuration for running an analysis.

    An analysis targets a metric (note that a metric could be a complex object) for one or multiple fields in
    one or multiple segments. The output is a list of 'anomalies' that might show issues with data.
    """

    metadata: Optional[Metadata] = Field(
        None, description="WhyLabs-managed metadata. This is to track various metadata for auditing."
    )

    id: str = Field(
        description="A unique, human readable ID for an analyzer. "
        "Can only contain alpha numeric characters, underscores and dashes",
        min_length=10,
        max_length=128,
        regex='[0-9a-zA-Z\\-_]+',
    )
    displayName: Optional[str] = Field(
        None,
        id="DisplayName",
        description="A display name for the analyzer if view through WhyLabs UI. Can only contain dashes, underscores,"
        "spaces, and alphanumeric characters",
        min_length=10,
        max_length=256,
        regex='[0-9a-zA-Z \\-_]+',
    )
    tags: Optional[  # type: ignore
        List[constr(min_length=3, max_length=256, regex="[0-9a-zA-Z\\-_]")]  # noqa
    ] = Field(  # noqa F722
        None, description="A list of tags that are associated with the analyzer."
    )
    targetSize: Optional[int] = Field(
        None,
        id="DisplayName",
        description="By default analyzers compare a baseline to a single target bucket who's size aligns with "
        "the dataset granularity. For example a daily dataset will use targets with a size of "
        "one day. Some datasets with a lot of fluctuation can lead to noisy monitors. One "
        "approach to making analyzers less noisy in such a scenario is to increase the targetSize to "
        "average across more than a single bucket.",
    )
    schedule: Optional[Union[CronSchedule, FixedCadenceSchedule]] = Field(
        None,
        description="A schedule for running the analyzer. If not set, the analyzer's considered disabled",
        discriminator="type",
    )
    disabled: Optional[bool] = Field(
        None,
        description="Whether the analyzer is disabled. "
        "This allows user to keep the configuration"
        "around without having to delete the analyzer config",
    )
    disableTargetRollup: Optional[bool] = Field(
        None,
        description="For customers with individual profile storage enabled on their account (contact us), "
        "this allows a user to monitor individual profiles without rolling them up. "
        "When enabled, analysis will be timestamped 1:1 with the profile's dataset timestamp "
        "rather than being truncated to the dataset granularity. ",
    )
    targetMatrix: Optional[Union[ColumnMatrix, DatasetMatrix]] = Field(
        default=None,
        description="A matrix for possible locations of the target",
        discriminator='type',
    )
    dataReadinessDuration: Optional[str] = duration_field(
        title="DataReadinessDuration",
        description="ISO 8610 duration format. The duration determines how fast data is ready for the monitor. For "
        "example, if your pipeline takes 2 days to deliver profiles to WhyLabs, the value should be"
        "P2D. Note that this value will be used to evaluate missing data as well",
    )
    batchCoolDownPeriod: Optional[str] = duration_field(
        title="BatchCoolDownPeriod",
        description="ISO 8610 duration format. Specifies the duration that the monitor will wait from the last time"
        "a profile arrives Any batch involved in the calculation must have received the last profile by "
        "the duration.",
    )
    backfillGracePeriodDuration: Optional[str] = duration_field(
        title="BackfillGracePeriodDuration",
        description="ISO 8610 duration format. How far back an analyzer will attempt to backfill late data. Note that "
        "we will only backfill batches not previously analyzed. If the batch was already analyzed, "
        "even with partial data, the backfill will ignore the new data unless you trigger an explicit "
        "backfill request. We support 48 hours for hourly data, 30 days for daily data, and 6 months for "
        "monthly data.",
    )
    config: Union[
        ConjunctionConfig,
        DisjunctionConfig,
        DiffConfig,
        ComparisonConfig,
        ListComparisonConfig,
        FrequentStringComparisonConfig,
        ColumnListChangeConfig,
        FixedThresholdsConfig,
        StddevConfig,
        DriftConfig,
        ExperimentalConfig,
        SeasonalConfig,
    ] = Field(description="The configuration map of the analyzer", discriminator='type')

[docs]    class Config:
        """Updates JSON schema anyOf to oneOf."""

        # noinspection PyUnusedLocal
[docs]        @staticmethod
        def schema_extra(schema: Dict[str, Any], model: BaseModel) -> None:
            """Update specific fields here (for Union type, specifically)."""
            anyOf_to_oneOf(schema, 'config')
            anyOf_to_oneOf(schema, 'targetMatrix')