monitor_schema.models.DriftConfig#

class monitor_schema.models.DriftConfig[source]#

An analyzer using stddev for a window of time range.

This analysis will detect whether the data drifts or not. By default, we use hellinger distance with a threshold of 0.7.

Show JSON schema
{
   "title": "DriftConfig",
   "description": "An analyzer using stddev for a window of time range.\n\nThis analysis will detect whether the data drifts or not. By default, we use hellinger distance with a threshold\nof 0.7.",
   "type": "object",
   "properties": {
      "schemaVersion": {
         "title": "SchemaVersion",
         "description": "The schema version of an algorithm. Typically this valueis not required.",
         "type": "integer"
      },
      "params": {
         "title": "Params",
         "description": "Extra parameters for the algorithm",
         "type": "object",
         "additionalProperties": {
            "type": "string",
            "maxLength": 1000
         }
      },
      "metric": {
         "title": "Metric",
         "enum": [
            "histogram",
            "frequent_items"
         ],
         "type": "string"
      },
      "type": {
         "title": "Type",
         "enum": [
            "drift"
         ],
         "type": "string"
      },
      "algorithm": {
         "title": "Algorithm",
         "description": "The algorithm to use when calculating drift.",
         "default": "hellinger",
         "enum": [
            "hellinger",
            "ks_test",
            "kl_divergence",
            "variation_distance"
         ],
         "type": "string"
      },
      "threshold": {
         "title": "Threshold",
         "description": "The threshold for the distance algorithm. Depending on the algorithm, this thresholdis used for greater than or less than comparison.",
         "default": 0.7,
         "type": "number"
      },
      "minBatchSize": {
         "title": "MinBatchSize",
         "description": "Minimum number of batches that is required",
         "default": 1,
         "minimum": 1,
         "type": "integer"
      },
      "baseline": {
         "title": "Baseline",
         "oneOf": [
            {
               "$ref": "#/definitions/TrailingWindowBaseline"
            },
            {
               "$ref": "#/definitions/ReferenceProfileId"
            },
            {
               "$ref": "#/definitions/TimeRangeBaseline"
            },
            {
               "$ref": "#/definitions/SingleBatchBaseline"
            }
         ]
      }
   },
   "required": [
      "metric",
      "type",
      "baseline"
   ],
   "additionalProperties": false,
   "definitions": {
      "TimeRange": {
         "title": "TimeRange",
         "description": "Support for a specific time range.",
         "type": "object",
         "properties": {
            "start": {
               "title": "Start",
               "description": "Inclusive. Start time of a time range.",
               "type": "string",
               "format": "date-time"
            },
            "end": {
               "title": "End",
               "description": "Exclusive. End time of a time range.",
               "type": "string",
               "format": "date-time"
            }
         },
         "required": [
            "start",
            "end"
         ],
         "additionalProperties": false
      },
      "TrailingWindowBaseline": {
         "title": "TrailingWindowBaseline",
         "description": "A dynamic trailing window.\n\nThis is useful if you don't have a static baseline to monitor against. This is the default mode for most\nmonitors.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "TrailingWindow"
               ],
               "type": "string"
            },
            "size": {
               "title": "Size",
               "description": "Window size",
               "exclusiveMinimum": 3,
               "maximum": 90,
               "type": "integer"
            },
            "offset": {
               "title": "Offset",
               "description": "Offset from the current batch for the range of the trailing window. Default to 1 (the previous batch). This means that if set this to 0, the baseline will include the current batch's value, orif we set it o 7, then the window is off by 7.",
               "type": "integer"
            },
            "exclusionRanges": {
               "title": "ExclusionRanges",
               "description": "The list of exclusion ranges",
               "maxItems": 100,
               "type": "array",
               "items": {
                  "$ref": "#/definitions/TimeRange"
               }
            }
         },
         "required": [
            "type",
            "size"
         ],
         "additionalProperties": false
      },
      "ReferenceProfileId": {
         "title": "ReferenceProfileId",
         "description": "A baseline based on a static reference profile.\n\nA typical use case is to use a \"gold\" dataset and upload its profile to WhyLabs. This can be a training dataset\nas well for an ML model.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "Reference"
               ],
               "type": "string"
            },
            "profileId": {
               "title": "ProfileId",
               "description": "The unique profile ID for the reference profile",
               "maxLength": 100,
               "type": "string"
            }
         },
         "required": [
            "type",
            "profileId"
         ],
         "additionalProperties": false
      },
      "TimeRangeBaseline": {
         "title": "TimeRangeBaseline",
         "description": "A static time range.\n\nInstead of using a single profile or a trailing window, user can lock in a \"good\" period.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "TimeRange"
               ],
               "type": "string"
            },
            "range": {
               "title": "Range",
               "description": "The range to set the time range with",
               "allOf": [
                  {
                     "$ref": "#/definitions/TimeRange"
                  }
               ]
            }
         },
         "required": [
            "type",
            "range"
         ],
         "additionalProperties": false
      },
      "SingleBatchBaseline": {
         "title": "SingleBatchBaseline",
         "description": "Using current batch.\n\nThis is used when you want to use one batch to monitor another batch in a different metric entity.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "CurrentBatch"
               ],
               "type": "string"
            },
            "offset": {
               "title": "Offset",
               "description": "Offset from the current batch for the baseline. Default to 0 - (the current batch). This means that if this field set this to 0, the baseline be the current batch's value. The dataset fieldis required to be set for this baseline config.Typical use case is to use another entity to monitor against the current entity",
               "type": "integer"
            }
         },
         "required": [
            "datasetId",
            "type"
         ],
         "additionalProperties": false
      }
   }
}

Config
  • schema_extra: function = <function AlgorithmConfig.Config.schema_extra at 0xffff857b2a60>

Fields
field algorithm: Literal['hellinger', 'ks_test', 'kl_divergence', 'variation_distance'] = 'hellinger'#

The algorithm to use when calculating drift.

field baseline: Union[TrailingWindowBaseline, ReferenceProfileId, TimeRangeBaseline, SingleBatchBaseline] [Required]#
field metric: frequent_items: 'frequent_items'>] [Required]#
field minBatchSize: Optional[int] = 1#

Minimum number of batches that is required

Constraints
  • minimum = 1

field threshold: float = 0.7#

The threshold for the distance algorithm. Depending on the algorithm, this thresholdis used for greater than or less than comparison.

field type: drift: 'drift'>] [Required]#