monitor_schema.models.Analyzer#

class monitor_schema.models.Analyzer[source]#

Configuration for running an analysis.

An analysis targets a metric (note that a metric could be a complex object) for one or multiple fields in one or multiple segments. The output is a list of ‘anomalies’ that might show issues with data.

Show JSON schema
{
   "title": "Analyzer",
   "description": "Configuration for running an analysis.\n\nAn analysis targets a metric (note that a metric could be a complex object) for one or multiple fields in\none or multiple segments. The output is a list of 'anomalies' that might show issues with data.",
   "type": "object",
   "properties": {
      "metadata": {
         "title": "Metadata",
         "description": "WhyLabs-managed metadata. This is to track various metadata for auditing.",
         "allOf": [
            {
               "$ref": "#/definitions/Metadata"
            }
         ]
      },
      "id": {
         "title": "Id",
         "description": "A unique, human readable ID for an analyzer. Can only contain alpha numeric characters, underscores and dashes",
         "maxLength": 128,
         "minLength": 10,
         "pattern": "[0-9a-zA-Z\\-_]+",
         "type": "string"
      },
      "displayName": {
         "title": "Displayname",
         "description": "A display name for the analyzer if view through WhyLabs UI. Can only contain dashes, underscores,spaces, and alphanumeric characters",
         "maxLength": 256,
         "minLength": 10,
         "pattern": "[0-9a-zA-Z \\-_]+",
         "id": "DisplayName",
         "type": "string"
      },
      "tags": {
         "title": "Tags",
         "description": "A list of tags that are associated with the analyzer.",
         "type": "array",
         "items": {
            "type": "string",
            "minLength": 3,
            "maxLength": 32,
            "pattern": "[0-9a-zA-Z\\-_]"
         }
      },
      "schedule": {
         "title": "Schedule",
         "description": "A schedule for running the analyzer. If not set, the analyzer's considered disabled",
         "discriminator": {
            "propertyName": "type",
            "mapping": {
               "cron": "#/definitions/CronSchedule",
               "fixed": "#/definitions/FixedCadenceSchedule"
            }
         },
         "oneOf": [
            {
               "$ref": "#/definitions/CronSchedule"
            },
            {
               "$ref": "#/definitions/FixedCadenceSchedule"
            }
         ]
      },
      "disabled": {
         "title": "Disabled",
         "description": "Whether the analyzer is disabled. This allows user to keep the configurationaround without having to delete the analyzer config",
         "type": "boolean"
      },
      "targetMatrix": {
         "title": "Targetmatrix",
         "description": "A matrix for possible locations of the target",
         "discriminator": {
            "propertyName": "type",
            "mapping": {
               "column": "#/definitions/ColumnMatrix",
               "dataset": "#/definitions/DatasetMatrix"
            }
         },
         "oneOf": [
            {
               "$ref": "#/definitions/ColumnMatrix"
            },
            {
               "$ref": "#/definitions/DatasetMatrix"
            }
         ]
      },
      "dataReadinessDuration": {
         "title": "DataReadinessDuration",
         "description": "ISO 8610 duration format. The duration determines how fast data is ready for the monitor. For example, if your pipeline takes 2 days to deliver profiles to WhyLabs, the value should beP2D. Note that this value will be used to evaluate missing data as well",
         "pattern": "^P(?!$)(\\d+M)?(\\d+W)?(\\d+D)?(T(?=\\d+[HM])(\\d+H)?(\\d+M)?)?$",
         "example": "PT1H, P1D",
         "type": "string"
      },
      "batchCoolDownPeriod": {
         "title": "BatchCoolDownPeriod",
         "description": "ISO 8610 duration format. Specifies the duration that the monitor will wait from the last timea profile arrives Any batch involved in the calculation must have received the last profile by the duration.",
         "pattern": "^P(?!$)(\\d+M)?(\\d+W)?(\\d+D)?(T(?=\\d+[HM])(\\d+H)?(\\d+M)?)?$",
         "example": "PT1H, P1D",
         "type": "string"
      },
      "backfillGracePeriodDuration": {
         "title": "BackfillGracePeriodDuration",
         "description": "ISO 8610 duration format. How far back an analyzer will attempt to backfill late data. Note that we will only backfill batches not previously analyzed. If the batch was already analyzed, even with partial data, the backfill will ignore the new data unless you trigger an explicit backfill request. We support 48 hours for hourly data, 30 days for daily data, and 6 months for monthly data.",
         "pattern": "^P(?!$)(\\d+M)?(\\d+W)?(\\d+D)?(T(?=\\d+[HM])(\\d+H)?(\\d+M)?)?$",
         "example": "PT1H, P1D",
         "type": "string"
      },
      "config": {
         "title": "Config",
         "description": "The configuration map of the analyzer",
         "discriminator": {
            "propertyName": "type",
            "mapping": {
               "diff": "#/definitions/DiffConfig",
               "comparison": "#/definitions/ComparisonConfig",
               "list_comparison": "#/definitions/ListComparisonConfig",
               "column_list": "#/definitions/ColumnListChangeConfig",
               "fixed": "#/definitions/FixedThresholdsConfig",
               "stddev": "#/definitions/StddevConfig",
               "drift": "#/definitions/DriftConfig",
               "experimental": "#/definitions/ExperimentalConfig",
               "seasonal": "#/definitions/SeasonalConfig"
            }
         },
         "oneOf": [
            {
               "$ref": "#/definitions/DiffConfig"
            },
            {
               "$ref": "#/definitions/ComparisonConfig"
            },
            {
               "$ref": "#/definitions/ListComparisonConfig"
            },
            {
               "$ref": "#/definitions/ColumnListChangeConfig"
            },
            {
               "$ref": "#/definitions/FixedThresholdsConfig"
            },
            {
               "$ref": "#/definitions/StddevConfig"
            },
            {
               "$ref": "#/definitions/DriftConfig"
            },
            {
               "$ref": "#/definitions/ExperimentalConfig"
            },
            {
               "$ref": "#/definitions/SeasonalConfig"
            }
         ]
      }
   },
   "required": [
      "targetMatrix",
      "config"
   ],
   "additionalProperties": false,
   "definitions": {
      "Metadata": {
         "title": "Metadata",
         "description": "Metadata for a top-level objects such as monitors, analyzers, and schema.\n\nThis object is managed by WhyLabs. Any user-provided values will be ignored on WhyLabs side.",
         "type": "object",
         "properties": {
            "version": {
               "title": "Version",
               "description": "A monotonically increasing numer that indicates the version of the object.",
               "type": "integer"
            },
            "schemaVersion": {
               "title": "Schemaversion",
               "description": "The version of the schema. Currently the accepted value is 1.",
               "minimum": 1,
               "maximum": 1,
               "type": "integer"
            },
            "updatedTimestamp": {
               "title": "Updatedtimestamp",
               "description": "Last updated timestamp",
               "exclusiveMinimum": 0,
               "type": "integer"
            },
            "author": {
               "title": "Author",
               "description": "The author of the change. It can be an API Key ID, a user ID, or a WhyLabs system ID.",
               "maxLength": 100,
               "pattern": "[0-9a-zA-Z-_.+]+",
               "type": "string"
            },
            "description": {
               "title": "Description",
               "description": "A description of the object",
               "maxLength": 1000,
               "type": "string"
            }
         },
         "required": [
            "version",
            "updatedTimestamp",
            "author"
         ],
         "additionalProperties": false
      },
      "TimeRange": {
         "title": "TimeRange",
         "description": "Support for a specific time range.",
         "type": "object",
         "properties": {
            "start": {
               "title": "Start",
               "description": "Inclusive. Start time of a time range.",
               "type": "string",
               "format": "date-time"
            },
            "end": {
               "title": "End",
               "description": "Exclusive. End time of a time range.",
               "type": "string",
               "format": "date-time"
            }
         },
         "required": [
            "start",
            "end"
         ],
         "additionalProperties": false
      },
      "CronSchedule": {
         "title": "CronSchedule",
         "description": "Support for scheduling.",
         "type": "object",
         "properties": {
            "type": {
               "title": "Type",
               "enum": [
                  "cron"
               ],
               "type": "string"
            },
            "cron": {
               "title": "Cron",
               "description": "Cron expression",
               "pattern": "(@(annually|yearly|monthly|weekly|daily|hourly))|((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*) ?){5,7})",
               "type": "string"
            },
            "exclusionRanges": {
               "title": "ExclusionRanges",
               "description": "The ranges of dates during which this Analyzer is NOT run.",
               "type": "array",
               "items": {
                  "$ref": "#/definitions/TimeRange"
               }
            }
         },
         "required": [
            "type",
            "cron"
         ],
         "additionalProperties": false
      },
      "FixedCadenceSchedule": {
         "title": "FixedCadenceSchedule",
         "description": "Support for scheduling based on a predefined cadence.",
         "type": "object",
         "properties": {
            "type": {
               "title": "Type",
               "enum": [
                  "fixed"
               ],
               "type": "string"
            },
            "cadence": {
               "title": "Cadence",
               "description": "Frequency to run the analyzer or monitor, based on UTC time. The monitor will run at the start of the cadence with some SLA depending on the customer tiers.",
               "enum": [
                  "hourly",
                  "daily",
                  "weekly",
                  "monthly"
               ],
               "type": "string"
            },
            "exclusionRanges": {
               "title": "ExclusionRanges",
               "description": "Ranges of dates during which this Analyzer is NOT run.",
               "type": "array",
               "items": {
                  "$ref": "#/definitions/TimeRange"
               }
            }
         },
         "required": [
            "type",
            "cadence"
         ],
         "additionalProperties": false
      },
      "SegmentTag": {
         "title": "SegmentTag",
         "description": "A single tag key value pair for a segment.",
         "type": "object",
         "properties": {
            "key": {
               "title": "Key",
               "maxLength": 1000,
               "type": "string"
            },
            "value": {
               "title": "Value",
               "maxLength": 1000,
               "type": "string"
            }
         },
         "required": [
            "key",
            "value"
         ],
         "additionalProperties": false
      },
      "Segment": {
         "title": "Segment",
         "description": "A segment is a list of tags.\n\nWe normalize these in the backend.",
         "type": "object",
         "properties": {
            "tags": {
               "title": "Tags",
               "description": "List of tags that define the specific segment",
               "maxItems": 10,
               "type": "array",
               "items": {
                  "$ref": "#/definitions/SegmentTag"
               }
            }
         },
         "required": [
            "tags"
         ],
         "additionalProperties": false
      },
      "ColumnGroups": {
         "title": "ColumnGroups",
         "description": "Standard column groupings.",
         "enum": [
            "group:continuous",
            "group:discrete",
            "group:input",
            "group:output",
            "group:bool",
            "group:int",
            "group:frac",
            "group:str"
         ],
         "type": "string"
      },
      "ColumnMatrix": {
         "title": "ColumnMatrix",
         "description": "Define the matrix of columns and segments to fan out for monitoring.",
         "type": "object",
         "properties": {
            "segments": {
               "title": "Segments",
               "description": "List of targeted segments. If not set, default to the overall segment",
               "maxItems": 1000,
               "type": "array",
               "items": {
                  "$ref": "#/definitions/Segment"
               }
            },
            "type": {
               "title": "Type",
               "enum": [
                  "column"
               ],
               "type": "string"
            },
            "include": {
               "title": "Include",
               "description": "List of allowed fields/features/columns. Could be a grouping as well.",
               "maxItems": 1000,
               "type": "array",
               "items": {
                  "anyOf": [
                     {
                        "$ref": "#/definitions/ColumnGroups"
                     },
                     {
                        "type": "string",
                        "maxLength": 1000
                     }
                  ]
               }
            },
            "exclude": {
               "title": "Exclude",
               "description": "List of blocked fields/features/columns. Could be a grouping as well. This setting is evaluated AFTER the 'include' field and thus should be used with caution.",
               "maxItems": 1000,
               "type": "array",
               "items": {
                  "anyOf": [
                     {
                        "$ref": "#/definitions/ColumnGroups"
                     },
                     {
                        "type": "string",
                        "maxLength": 1000
                     }
                  ]
               }
            }
         },
         "required": [
            "type"
         ],
         "additionalProperties": false
      },
      "DatasetMatrix": {
         "title": "DatasetMatrix",
         "description": "Define the matrix of fields and segments to fan out for monitoring.\n\n.",
         "type": "object",
         "properties": {
            "segments": {
               "title": "Segments",
               "description": "List of targeted segments. If not set, default to the overall segment",
               "maxItems": 1000,
               "type": "array",
               "items": {
                  "$ref": "#/definitions/Segment"
               }
            },
            "type": {
               "title": "Type",
               "description": "Must be 'dataset' level",
               "default": "dataset",
               "enum": [
                  "dataset"
               ],
               "type": "string"
            }
         },
         "additionalProperties": false
      },
      "DatasetMetric": {
         "title": "DatasetMetric",
         "description": "Metrics that are applicable at the dataset level.",
         "enum": [
            "profile.count",
            "profile.last_ingestion_time",
            "profile.first_ingestion_time",
            "column_row_count_sum",
            "shape_column_count",
            "shape_row_count",
            "input.count",
            "output.count",
            "classification.f1",
            "classification.precision",
            "classification.recall",
            "classification.accuracy",
            "classification.auc",
            "regression.mse",
            "regression.mae",
            "regression.rmse"
         ],
         "type": "string"
      },
      "SimpleColumnMetric": {
         "title": "SimpleColumnMetric",
         "description": "Simple column metrics that are basically just a single number.",
         "enum": [
            "count",
            "median",
            "max",
            "min",
            "mean",
            "stddev",
            "variance",
            "unique_upper",
            "unique_upper_ratio",
            "unique_est",
            "unique_est_ratio",
            "unique_lower",
            "unique_lower_ratio",
            "count_bool",
            "count_bool_ratio",
            "count_integral",
            "count_integral_ratio",
            "count_fractional",
            "count_fractional_ratio",
            "count_string",
            "count_string_ratio",
            "count_null",
            "count_null_ratio",
            "inferred_data_type",
            "quantile_5",
            "quantile_75",
            "quantile_25",
            "quantile_90",
            "quantile_95",
            "quantile_99"
         ],
         "type": "string"
      },
      "DiffMode": {
         "title": "DiffMode",
         "description": "Whether to use the absolute difference or the percentage to calculate the difference.",
         "enum": [
            "abs",
            "pct"
         ],
         "type": "string"
      },
      "ThresholdType": {
         "title": "ThresholdType",
         "description": "Threshold Type declaring the upper and lower bound.\n\n    By default an anomaly will be generated when the target is above or below the baseline\n    by the specified threshold.\n\n    If its only desirable to alert when the target is above the\n    baseline and not the other way around, specify upper for your ThresholdType.\n    ",
         "enum": [
            "lower",
            "upper"
         ],
         "type": "string"
      },
      "TrailingWindowBaseline": {
         "title": "TrailingWindowBaseline",
         "description": "A dynamic trailing window.\n\nThis is useful if you don't have a static baseline to monitor against. This is the default mode for most\nmonitors.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "TrailingWindow"
               ],
               "type": "string"
            },
            "size": {
               "title": "Size",
               "description": "Window size",
               "exclusiveMinimum": 3,
               "maximum": 90,
               "type": "integer"
            },
            "offset": {
               "title": "Offset",
               "description": "Offset from the current batch for the range of the trailing window. Default to 1 (the previous batch). This means that if set this to 0, the baseline will include the current batch's value, orif we set it o 7, then the window is off by 7.",
               "type": "integer"
            },
            "exclusionRanges": {
               "title": "ExclusionRanges",
               "description": "The list of exclusion ranges",
               "maxItems": 100,
               "type": "array",
               "items": {
                  "$ref": "#/definitions/TimeRange"
               }
            }
         },
         "required": [
            "type",
            "size"
         ],
         "additionalProperties": false
      },
      "ReferenceProfileId": {
         "title": "ReferenceProfileId",
         "description": "A baseline based on a static reference profile.\n\nA typical use case is to use a \"gold\" dataset and upload its profile to WhyLabs. This can be a training dataset\nas well for an ML model.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "Reference"
               ],
               "type": "string"
            },
            "profileId": {
               "title": "ProfileId",
               "description": "The unique profile ID for the reference profile",
               "maxLength": 100,
               "type": "string"
            }
         },
         "required": [
            "type",
            "profileId"
         ],
         "additionalProperties": false
      },
      "TimeRangeBaseline": {
         "title": "TimeRangeBaseline",
         "description": "A static time range.\n\nInstead of using a single profile or a trailing window, user can lock in a \"good\" period.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "TimeRange"
               ],
               "type": "string"
            },
            "range": {
               "title": "Range",
               "description": "The range to set the time range with",
               "allOf": [
                  {
                     "$ref": "#/definitions/TimeRange"
                  }
               ]
            }
         },
         "required": [
            "type",
            "range"
         ],
         "additionalProperties": false
      },
      "SingleBatchBaseline": {
         "title": "SingleBatchBaseline",
         "description": "Using current batch.\n\nThis is used when you want to use one batch to monitor another batch in a different metric entity.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "CurrentBatch"
               ],
               "type": "string"
            },
            "offset": {
               "title": "Offset",
               "description": "Offset from the current batch for the baseline. Default to 0 - (the current batch). This means that if this field set this to 0, the baseline be the current batch's value. The dataset fieldis required to be set for this baseline config.Typical use case is to use another entity to monitor against the current entity",
               "type": "integer"
            }
         },
         "required": [
            "datasetId",
            "type"
         ],
         "additionalProperties": false
      },
      "DiffConfig": {
         "title": "DiffConfig",
         "description": "Detecting the differences between two numerical metrics.",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "description": "The target metric. This field cannot be change once the analyzer is created.",
               "anyOf": [
                  {
                     "$ref": "#/definitions/DatasetMetric"
                  },
                  {
                     "$ref": "#/definitions/SimpleColumnMetric"
                  },
                  {
                     "type": "string",
                     "maxLength": 100
                  }
               ]
            },
            "type": {
               "title": "Type",
               "enum": [
                  "diff"
               ],
               "type": "string"
            },
            "mode": {
               "$ref": "#/definitions/DiffMode"
            },
            "thresholdType": {
               "$ref": "#/definitions/ThresholdType"
            },
            "threshold": {
               "title": "Threshold",
               "description": "The minimum threshold that will trigger an anomaly. The monitor detect the difference betweenthe target's metric and the baseline metric. Both of these metrics MUST be in rolled up form",
               "type": "number"
            },
            "baseline": {
               "title": "Baseline",
               "oneOf": [
                  {
                     "$ref": "#/definitions/TrailingWindowBaseline"
                  },
                  {
                     "$ref": "#/definitions/ReferenceProfileId"
                  },
                  {
                     "$ref": "#/definitions/TimeRangeBaseline"
                  },
                  {
                     "$ref": "#/definitions/SingleBatchBaseline"
                  }
               ]
            }
         },
         "required": [
            "metric",
            "type",
            "mode",
            "threshold",
            "baseline"
         ],
         "additionalProperties": false
      },
      "ComparisonOperator": {
         "title": "ComparisonOperator",
         "description": "Operators for performing a comparison.",
         "enum": [
            "eq",
            "gt",
            "lt",
            "ge",
            "le"
         ],
         "type": "string"
      },
      "ExpectedValue": {
         "title": "ExpectedValue",
         "description": "Expected value: one of these fields must be set.",
         "type": "object",
         "properties": {
            "str": {
               "title": "Str",
               "maxLength": 100,
               "type": "string"
            },
            "int": {
               "title": "Int",
               "type": "integer"
            },
            "float": {
               "title": "Float",
               "type": "number"
            }
         },
         "additionalProperties": false
      },
      "ComparisonConfig": {
         "title": "ComparisonConfig",
         "description": "Compare whether the target against a value or against a baseline's metric.\n\nThis is useful to detect data type change, for instance.",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "description": "The target metric. This field cannot be change once the analyzer is created.",
               "anyOf": [
                  {
                     "$ref": "#/definitions/DatasetMetric"
                  },
                  {
                     "$ref": "#/definitions/SimpleColumnMetric"
                  },
                  {
                     "type": "string",
                     "maxLength": 100
                  }
               ]
            },
            "type": {
               "title": "Type",
               "enum": [
                  "comparison"
               ],
               "type": "string"
            },
            "operator": {
               "description": "The operator for the comparison. The right hand side is the target batch's metric. The left handside is the expected value or a baseline's metric.",
               "allOf": [
                  {
                     "$ref": "#/definitions/ComparisonOperator"
                  }
               ]
            },
            "expected": {
               "title": "Expected",
               "description": "The expected value of the equality. If the value is not set we will extract the corresponding metric from the baseline and perform the comparison",
               "allOf": [
                  {
                     "$ref": "#/definitions/ExpectedValue"
                  }
               ]
            },
            "baseline": {
               "title": "Baseline",
               "oneOf": [
                  {
                     "$ref": "#/definitions/TrailingWindowBaseline"
                  },
                  {
                     "$ref": "#/definitions/ReferenceProfileId"
                  },
                  {
                     "$ref": "#/definitions/TimeRangeBaseline"
                  },
                  {
                     "$ref": "#/definitions/SingleBatchBaseline"
                  }
               ]
            }
         },
         "required": [
            "metric",
            "type",
            "operator"
         ],
         "additionalProperties": false
      },
      "ListComparisonOperator": {
         "title": "ListComparisonOperator",
         "description": "Operators for performing a comparison.",
         "enum": [
            "in",
            "not_in"
         ],
         "type": "string"
      },
      "ListComparisonConfig": {
         "title": "ListComparisonConfig",
         "description": "Compare whether target against a list of values.",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "description": "The target metric. This field cannot be change once the analyzer is created.",
               "anyOf": [
                  {
                     "$ref": "#/definitions/DatasetMetric"
                  },
                  {
                     "$ref": "#/definitions/SimpleColumnMetric"
                  },
                  {
                     "type": "string",
                     "maxLength": 100
                  }
               ]
            },
            "type": {
               "title": "Type",
               "enum": [
                  "list_comparison"
               ],
               "type": "string"
            },
            "operator": {
               "description": "The operator for the comparison. The right hand side is the target batch's metric. The left handside is the expected value or a baseline's metric.",
               "allOf": [
                  {
                     "$ref": "#/definitions/ListComparisonOperator"
                  }
               ]
            },
            "expected": {
               "title": "Expected",
               "description": "The expected values of the equality. If the value is not set we will extract the corresponding metric from the baseline and perform the comparison",
               "type": "array",
               "items": {
                  "$ref": "#/definitions/ExpectedValue"
               }
            },
            "baseline": {
               "title": "Baseline",
               "oneOf": [
                  {
                     "$ref": "#/definitions/TrailingWindowBaseline"
                  },
                  {
                     "$ref": "#/definitions/ReferenceProfileId"
                  },
                  {
                     "$ref": "#/definitions/TimeRangeBaseline"
                  },
                  {
                     "$ref": "#/definitions/SingleBatchBaseline"
                  }
               ]
            }
         },
         "required": [
            "metric",
            "type",
            "operator"
         ],
         "additionalProperties": false
      },
      "ColumnListChangeConfig": {
         "title": "ColumnListChangeConfig",
         "description": "Compare whether the target is equal to a value or not.\n\nThis is useful to detect data type change, for instance.",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "enum": [
                  "column_list"
               ],
               "type": "string"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "column_list"
               ],
               "type": "string"
            },
            "mode": {
               "title": "Mode",
               "default": "ON_ADD_AND_REMOVE",
               "enum": [
                  "ON_ADD_AND_REMOVE",
                  "ON_ADD",
                  "ON_REMOVE"
               ],
               "type": "string"
            },
            "exclude": {
               "title": "Exclude",
               "description": "Ignore these column names. User can specify a list of regex",
               "maxItems": 1000,
               "type": "array",
               "items": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "baseline": {
               "title": "Baseline",
               "oneOf": [
                  {
                     "$ref": "#/definitions/TrailingWindowBaseline"
                  },
                  {
                     "$ref": "#/definitions/ReferenceProfileId"
                  },
                  {
                     "$ref": "#/definitions/TimeRangeBaseline"
                  },
                  {
                     "$ref": "#/definitions/SingleBatchBaseline"
                  }
               ]
            }
         },
         "required": [
            "metric",
            "type",
            "baseline"
         ],
         "additionalProperties": false
      },
      "FixedThresholdsConfig": {
         "title": "FixedThresholdsConfig",
         "description": "Fixed threshold analysis.\n\nIf user fails to set both upper bound and lower bound, this algorithm becomes a no-op.\nWhyLabs might enforce the present of either fields in the future.",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "description": "The target metric. This field cannot be change once the analyzer is created.",
               "anyOf": [
                  {
                     "$ref": "#/definitions/DatasetMetric"
                  },
                  {
                     "$ref": "#/definitions/SimpleColumnMetric"
                  },
                  {
                     "type": "string",
                     "maxLength": 100
                  }
               ]
            },
            "type": {
               "title": "Type",
               "enum": [
                  "fixed"
               ],
               "type": "string"
            },
            "upper": {
               "title": "Upper",
               "description": "Upper bound of the static threshold",
               "type": "number"
            },
            "lower": {
               "title": "Lower",
               "description": "Lower bound of the static threshold",
               "type": "number"
            }
         },
         "required": [
            "metric",
            "type"
         ],
         "additionalProperties": false
      },
      "StddevConfig": {
         "title": "StddevConfig",
         "description": "Calculates upper bounds and lower bounds based on stddev from a series of numbers.\n\nAn analyzer using stddev for a window of time range.\n\nThis calculation will fall back to Poisson distribution if there is only 1 value in the baseline.\nFor 2 values, we use the formula sqrt((x_i - avg(x))^2 / n - 1)",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "description": "The target metric. This field cannot be change once the analyzer is created.",
               "anyOf": [
                  {
                     "$ref": "#/definitions/DatasetMetric"
                  },
                  {
                     "$ref": "#/definitions/SimpleColumnMetric"
                  },
                  {
                     "type": "string",
                     "maxLength": 100
                  }
               ]
            },
            "maxUpperThreshold": {
               "title": "Maxupperthreshold",
               "description": "Capping the threshold by this value. This value only becomes effective if the calculated upper threshold from the calculation is greater than this value.",
               "type": "number"
            },
            "minLowerThreshold": {
               "title": "Minlowerthreshold",
               "description": "Capping the minimum threshold by this value. This value only becomes effective if the calculated lower threshold from the calculation is lesser than this value",
               "type": "number"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "stddev"
               ],
               "type": "string"
            },
            "factor": {
               "title": "Factor",
               "description": "The multiplier used with stddev to build the upper and lower bounds.",
               "default": 3.0,
               "type": "number"
            },
            "minBatchSize": {
               "title": "MinBatchSize",
               "description": "Minimum number of batches that is required",
               "default": 1,
               "minimum": 1,
               "type": "integer"
            },
            "baseline": {
               "title": "Baseline",
               "oneOf": [
                  {
                     "$ref": "#/definitions/TrailingWindowBaseline"
                  },
                  {
                     "$ref": "#/definitions/TimeRangeBaseline"
                  },
                  {
                     "$ref": "#/definitions/ReferenceProfileId"
                  }
               ]
            }
         },
         "required": [
            "metric",
            "type",
            "baseline"
         ],
         "additionalProperties": false
      },
      "DriftConfig": {
         "title": "DriftConfig",
         "description": "An analyzer using stddev for a window of time range.\n\nThis analysis will detect whether the data drifts or not. By default, we use hellinger distance with a threshold\nof 0.7.",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "enum": [
                  "histogram",
                  "frequent_items"
               ],
               "type": "string"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "drift"
               ],
               "type": "string"
            },
            "algorithm": {
               "title": "Algorithm",
               "description": "The algorithm to use when calculating drift.",
               "default": "hellinger",
               "enum": [
                  "hellinger",
                  "ks_test",
                  "kl_divergence",
                  "variation_distance"
               ],
               "type": "string"
            },
            "threshold": {
               "title": "Threshold",
               "description": "The threshold for the distance algorithm. Depending on the algorithm, this thresholdis used for greater than or less than comparison.",
               "default": 0.7,
               "type": "number"
            },
            "minBatchSize": {
               "title": "MinBatchSize",
               "description": "Minimum number of batches that is required",
               "default": 1,
               "minimum": 1,
               "type": "integer"
            },
            "baseline": {
               "title": "Baseline",
               "oneOf": [
                  {
                     "$ref": "#/definitions/TrailingWindowBaseline"
                  },
                  {
                     "$ref": "#/definitions/ReferenceProfileId"
                  },
                  {
                     "$ref": "#/definitions/TimeRangeBaseline"
                  },
                  {
                     "$ref": "#/definitions/SingleBatchBaseline"
                  }
               ]
            }
         },
         "required": [
            "metric",
            "type",
            "baseline"
         ],
         "additionalProperties": false
      },
      "AlgorithmType": {
         "title": "AlgorithmType",
         "description": "Specify the algorithm type.",
         "enum": [
            "expected",
            "column_list",
            "comparison",
            "list_comparison",
            "diff",
            "drift",
            "stddev",
            "seasonal",
            "fixed",
            "experimental"
         ],
         "type": "string"
      },
      "ExperimentalConfig": {
         "title": "ExperimentalConfig",
         "description": "Experimental algorithm that is not standardized by the above ones yet.",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "description": "The target metric. This field cannot be change once the analyzer is created.",
               "anyOf": [
                  {
                     "$ref": "#/definitions/DatasetMetric"
                  },
                  {
                     "$ref": "#/definitions/SimpleColumnMetric"
                  },
                  {
                     "type": "string",
                     "maxLength": 100
                  }
               ]
            },
            "type": {
               "title": "Type",
               "enum": [
                  "experimental"
               ],
               "type": "string"
            },
            "implementation": {
               "title": "Implementation",
               "description": "The implementation of an experimental config",
               "maxLength": 100,
               "type": "string"
            },
            "baseline": {
               "title": "Baseline",
               "oneOf": [
                  {
                     "$ref": "#/definitions/TrailingWindowBaseline"
                  },
                  {
                     "$ref": "#/definitions/ReferenceProfileId"
                  },
                  {
                     "$ref": "#/definitions/TimeRangeBaseline"
                  },
                  {
                     "$ref": "#/definitions/SingleBatchBaseline"
                  }
               ]
            },
            "stub": {
               "description": "Stub field to flow algoirthm type into the schema. Do not use.",
               "allOf": [
                  {
                     "$ref": "#/definitions/AlgorithmType"
                  }
               ]
            }
         },
         "required": [
            "metric",
            "type",
            "implementation",
            "baseline"
         ],
         "additionalProperties": false
      },
      "SeasonalConfig": {
         "title": "SeasonalConfig",
         "description": "An analyzer using stddev for a window of time range.\n\nThis will fall back to Poisson distribution if there is only 1 value in the baseline.\n\nThis only works with TrailingWindow baseline (TODO: add backend validation)",
         "type": "object",
         "properties": {
            "schemaVersion": {
               "title": "SchemaVersion",
               "description": "The schema version of an algorithm. Typically this valueis not required.",
               "type": "integer"
            },
            "params": {
               "title": "Params",
               "description": "Extra parameters for the algorithm",
               "type": "object",
               "additionalProperties": {
                  "type": "string",
                  "maxLength": 1000
               }
            },
            "metric": {
               "title": "Metric",
               "description": "The target metric. This field cannot be change once the analyzer is created.",
               "anyOf": [
                  {
                     "$ref": "#/definitions/DatasetMetric"
                  },
                  {
                     "$ref": "#/definitions/SimpleColumnMetric"
                  },
                  {
                     "type": "string",
                     "maxLength": 100
                  }
               ]
            },
            "maxUpperThreshold": {
               "title": "Maxupperthreshold",
               "description": "Capping the threshold by this value. This value only becomes effective if the calculated upper threshold from the calculation is greater than this value.",
               "type": "number"
            },
            "minLowerThreshold": {
               "title": "Minlowerthreshold",
               "description": "Capping the minimum threshold by this value. This value only becomes effective if the calculated lower threshold from the calculation is lesser than this value",
               "type": "number"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "seasonal"
               ],
               "type": "string"
            },
            "algorithm": {
               "title": "Algorithm",
               "description": "The algorithm implementation for seasonal analysis",
               "default": "arima",
               "enum": [
                  "arima",
                  "rego",
                  "stastforecast"
               ],
               "type": "string"
            },
            "minBatchSize": {
               "title": "MinBatchSize",
               "description": "Minimum number of batches that is required",
               "default": 30,
               "type": "integer"
            },
            "alpha": {
               "title": "Alpha",
               "description": "significance level for the confidence interval produced around predictions. If 0.05 then the algorithm will calculate a 95% confidence interval around predictions",
               "default": 0.05,
               "type": "number"
            },
            "baseline": {
               "$ref": "#/definitions/TrailingWindowBaseline"
            },
            "stddevTimeRanges": {
               "title": "StddevTimeRanges",
               "description": "Ranges of time where we will apply standard deviation for confidence intervals rather than the confidence interval from the algorithm. This is to prevent data from specialevents from making the bands very wide for timeseries-based predictions.",
               "type": "array",
               "items": {
                  "$ref": "#/definitions/TimeRange"
               }
            },
            "stddevMaxBatchSize": {
               "title": "Stddevmaxbatchsize",
               "description": "Maxinum number of data points to consider for calculating stddev. These are the data pointspreceeding the target batch.",
               "type": "integer"
            },
            "stddevFactor": {
               "title": "Stddevfactor",
               "description": "The multiplier factor for calculating upper bounds and lower bounds from the prediction.",
               "default": 1.0,
               "type": "number"
            }
         },
         "required": [
            "metric",
            "type",
            "baseline"
         ],
         "additionalProperties": false
      }
   }
}

Config
  • schema_extra: function = <function Analyzer.Config.schema_extra at 0xffff85802f70>

Fields
field backfillGracePeriodDuration: Optional[str] = None#

ISO 8610 duration format. How far back an analyzer will attempt to backfill late data. Note that we will only backfill batches not previously analyzed. If the batch was already analyzed, even with partial data, the backfill will ignore the new data unless you trigger an explicit backfill request. We support 48 hours for hourly data, 30 days for daily data, and 6 months for monthly data.

Constraints
  • pattern = ^P(?!$)(d+M)?(d+W)?(d+D)?(T(?=d+[HM])(d+H)?(d+M)?)?$

field batchCoolDownPeriod: Optional[str] = None#

ISO 8610 duration format. Specifies the duration that the monitor will wait from the last timea profile arrives Any batch involved in the calculation must have received the last profile by the duration.

Constraints
  • pattern = ^P(?!$)(d+M)?(d+W)?(d+D)?(T(?=d+[HM])(d+H)?(d+M)?)?$

field config: Union[DiffConfig, ComparisonConfig, ListComparisonConfig, ColumnListChangeConfig, FixedThresholdsConfig, StddevConfig, DriftConfig, ExperimentalConfig, SeasonalConfig] [Required]#

The configuration map of the analyzer

field dataReadinessDuration: Optional[str] = None#

ISO 8610 duration format. The duration determines how fast data is ready for the monitor. For example, if your pipeline takes 2 days to deliver profiles to WhyLabs, the value should beP2D. Note that this value will be used to evaluate missing data as well

Constraints
  • pattern = ^P(?!$)(d+M)?(d+W)?(d+D)?(T(?=d+[HM])(d+H)?(d+M)?)?$

field disabled: Optional[bool] = None#

Whether the analyzer is disabled. This allows user to keep the configurationaround without having to delete the analyzer config

field displayName: Optional[str] = None#

A display name for the analyzer if view through WhyLabs UI. Can only contain dashes, underscores,spaces, and alphanumeric characters

Constraints
  • maxLength = 256

  • minLength = 10

  • pattern = [0-9a-zA-Z -_]+

field id: str = None#

A unique, human readable ID for an analyzer. Can only contain alpha numeric characters, underscores and dashes

Constraints
  • maxLength = 128

  • minLength = 10

  • pattern = [0-9a-zA-Z-_]+

field metadata: Optional[Metadata] = None#

WhyLabs-managed metadata. This is to track various metadata for auditing.

field schedule: Optional[Union[CronSchedule, FixedCadenceSchedule]] = None#

A schedule for running the analyzer. If not set, the analyzer’s considered disabled

field tags: Optional[List[ConstrainedStrValue]] = None#

A list of tags that are associated with the analyzer.

field targetMatrix: Union[ColumnMatrix, DatasetMatrix] [Required]#

A matrix for possible locations of the target