Source code for monitor_schema.cli

# type: ignore

"""Console script for monitor_schema."""
# flake8: noqa
import json
import uuid

import click
import yaml

from .models import (
    Analyzer,
    AnomalyFilter,
    BaselineType,
    Cadence,
    ColumnDataType,
    ColumnDiscreteness,
    ColumnMatrix,
    ColumnSchema,
    DatasetMatrix,
    DigestMode,
    Document,
    DriftConfig,
    EntitySchema,
    EveryAnomalyMode,
    FixedCadenceSchedule,
    GlobalAction,
    Granularity,
    Monitor,
    Segment,
    SendEmail,
    SlackWebhook,
    StddevConfig,
    TargetLevel,
    TrailingWindowBaseline,
)


@click.command()
def main() -> None:
    """Generates schema and example document JSON."""
    schema_str = Document.schema_json(indent=2)
    schema = json.loads(schema_str)
    schema['$id'] = "https://gitlab.com/whylabs/core/montor-schema/-/blob/main/schema/schema.json"
    schema['$version'] = "1.0"

    _dump_json_yaml('schema', json.dumps(schema, indent=2))
    f1_score_analyzer = Analyzer(
        id='drift-analyzer',
        schedule=FixedCadenceSchedule(type='fixed', cadence=Cadence.daily),
        targetMatrix=DatasetMatrix(
            type=TargetLevel.dataset,
            segments=[Segment(tags=[])],
        ),
        dataReadinessDuration="PT19H",
        backfillGracePeriodDuration="P7D",
        config=StddevConfig(
            type="stddev",
            metric='classification.f1',
            factor=5,
            baseline=TrailingWindowBaseline(type=BaselineType.TrailingWindow, size=14),
        ),
    )

    hist_drift_analyzer = Analyzer(
        id='numerical_drift-analyzer',
        schedule=FixedCadenceSchedule(type='fixed', cadence=Cadence.daily),
        targetMatrix=ColumnMatrix(
            type=TargetLevel.column,
            include=['group:continuous'],
            segments=[Segment(tags=[])],
        ),
        config=DriftConfig(
            type='drift',
            metric='histogram',
            algorithm='hellinger',
            threshold=0.5,
            baseline=TrailingWindowBaseline(type=BaselineType.TrailingWindow, size=14),
        ),
    )
    freqitems_drift_analyzer = Analyzer(
        id='frequent_items_drift-analyzer',
        schedule=FixedCadenceSchedule(type='fixed', cadence=Cadence.daily),
        targetMatrix=ColumnMatrix(
            type=TargetLevel.column,
            include=['group:discrete'],
            segments=[Segment(tags=[])],
        ),
        config=DriftConfig(
            type='drift',
            metric='frequent_items',
            algorithm='hellinger',
            threshold=0.5,
            baseline=TrailingWindowBaseline(type=BaselineType.TrailingWindow, size=14),
        ),
    )
    document = Document(
        id=uuid.UUID('{12345678-1234-5678-1234-567812345678}'),
        orgId='org-1',
        schemaVersion=1,
        datasetId='model-1',
        granularity=Granularity.daily,
        entitySchema=EntitySchema(
            columns={
                'annual_inc': ColumnSchema(
                    discreteness=ColumnDiscreteness.discrete, dataType=ColumnDataType.integral, classifier='input'
                ),
                'prediction': ColumnSchema(
                    discreteness=ColumnDiscreteness.discrete, dataType=ColumnDataType.integral, classifier='output'
                ),
            },
        ),
        analyzers=[hist_drift_analyzer, freqitems_drift_analyzer, f1_score_analyzer],
        monitors=[
            Monitor(
                id='drift-monitor-1',
                analyzerIds=[hist_drift_analyzer.id, freqitems_drift_analyzer.id],
                schedule=FixedCadenceSchedule(type='fixed', cadence='daily'),
                severity=4,
                mode=DigestMode(
                    type='DIGEST',
                    filter=AnomalyFilter(minAlertCount=20),
                ),
                actions=[
                    GlobalAction(type='global', target='action-xyz'),
                    SendEmail(type='email', target='demo@whylabs.ai'),
                    SlackWebhook(type='slack', target='https://demo.com'),
                ],
            ),
            Monitor(
                id='drift-monitor-important-features-2',
                analyzerIds=[hist_drift_analyzer.id, freqitems_drift_analyzer.id],
                schedule=FixedCadenceSchedule(type='fixed', cadence='daily'),
                severity=2,
                mode=EveryAnomalyMode(
                    type='EVERY_ANOMALY',
                    filter=AnomalyFilter(minWeight=0.5, minRankByWeight=10, excludeColumns=['very_noisy']),
                ),
                actions=[
                    GlobalAction(type='global', target='action-xyz'),
                    SendEmail(type='email', target='demo@whylabs.ai'),
                    SlackWebhook(type='slack', target='https://demo.com'),
                ],
            ),
            Monitor(
                id='f1-monitor-1',
                analyzerIds=[f1_score_analyzer.id],
                schedule=FixedCadenceSchedule(type='fixed', cadence='daily'),
                severity=2,
                mode=EveryAnomalyMode(type='EVERY_ANOMALY'),
                actions=[
                    GlobalAction(type='global', target='action-xyz'),
                    SendEmail(type='email', target='demo@whylabs.ai'),
                ],
            ),
        ],
    )
    doc_json = document.json(indent=2, exclude_none=True)

    _dump_json_yaml('example', doc_json)
    print("Done dumping")


[docs]def _dump_json_yaml(file_name: str, json_content: str) -> None: with open(f'{file_name}.json', 'wt') as f: f.write(json_content) with open(f'{file_name}.yaml', 'wt') as f: js = json.loads(json_content) yaml.dump(js, f, allow_unicode=True)
if __name__ == "__main__": main() # pragma: no cover