monitor_schema.models.StddevConfig#

class monitor_schema.models.StddevConfig[source]#

Calculates upper bounds and lower bounds based on stddev from a series of numbers.

An analyzer using stddev for a window of time range.

This calculation will fall back to Poisson distribution if there is only 1 value in the baseline. For 2 values, we use the formula sqrt((x_i - avg(x))^2 / n - 1)

Show JSON schema
{
   "title": "StddevConfig",
   "description": "Calculates upper bounds and lower bounds based on stddev from a series of numbers.\n\nAn analyzer using stddev for a window of time range.\n\nThis calculation will fall back to Poisson distribution if there is only 1 value in the baseline.\nFor 2 values, we use the formula sqrt((x_i - avg(x))^2 / n - 1)",
   "type": "object",
   "properties": {
      "schemaVersion": {
         "title": "SchemaVersion",
         "description": "The schema version of an algorithm. Typically this value is not required.",
         "type": "integer"
      },
      "params": {
         "title": "Params",
         "description": "Extra parameters for the algorithm",
         "type": "object",
         "additionalProperties": {
            "type": "string",
            "maxLength": 1000
         }
      },
      "metric": {
         "title": "Metric",
         "description": "The target metric. This field cannot be change once the analyzer is created.",
         "anyOf": [
            {
               "$ref": "#/definitions/DatasetMetric"
            },
            {
               "$ref": "#/definitions/SimpleColumnMetric"
            },
            {
               "type": "string",
               "maxLength": 100
            }
         ]
      },
      "maxUpperThreshold": {
         "title": "Maxupperthreshold",
         "description": "Capping the threshold by this value. This value only becomes effective if the calculated upper threshold from the calculation is greater than this value.",
         "type": "number"
      },
      "minLowerThreshold": {
         "title": "Minlowerthreshold",
         "description": "Capping the minimum threshold by this value. This value only becomes effective if the calculated lower threshold from the calculation is lesser than this value",
         "type": "number"
      },
      "thresholdType": {
         "$ref": "#/definitions/ThresholdType"
      },
      "type": {
         "title": "Type",
         "enum": [
            "stddev"
         ],
         "type": "string"
      },
      "factor": {
         "title": "Factor",
         "description": "The multiplier used with stddev to build the upper and lower bounds.",
         "default": 3.0,
         "type": "number"
      },
      "minBatchSize": {
         "title": "MinBatchSize",
         "description": "Minimum number of batches that is required",
         "default": 1,
         "minimum": 1,
         "type": "integer"
      },
      "baseline": {
         "title": "Baseline",
         "description": "A baseline for running the analyzer.",
         "discriminator": {
            "propertyName": "type",
            "mapping": {
               "TrailingWindow": "#/definitions/TrailingWindowBaseline",
               "TimeRange": "#/definitions/TimeRangeBaseline",
               "Reference": "#/definitions/ReferenceProfileId"
            }
         },
         "oneOf": [
            {
               "$ref": "#/definitions/TrailingWindowBaseline"
            },
            {
               "$ref": "#/definitions/TimeRangeBaseline"
            },
            {
               "$ref": "#/definitions/ReferenceProfileId"
            }
         ]
      }
   },
   "required": [
      "metric",
      "type",
      "baseline"
   ],
   "additionalProperties": false,
   "definitions": {
      "DatasetMetric": {
         "title": "DatasetMetric",
         "description": "Metrics that are applicable at the dataset level.",
         "enum": [
            "profile.count",
            "profile.last_ingestion_time",
            "profile.first_ingestion_time",
            "column_row_count_sum",
            "shape_column_count",
            "shape_row_count",
            "input.count",
            "output.count",
            "classification.f1",
            "classification.precision",
            "classification.recall",
            "classification.accuracy",
            "classification.fpr",
            "classification.auroc",
            "regression.mse",
            "regression.mae",
            "regression.rmse"
         ],
         "type": "string"
      },
      "SimpleColumnMetric": {
         "title": "SimpleColumnMetric",
         "description": "Simple column metrics that are basically just a single number.",
         "enum": [
            "count",
            "median",
            "max",
            "min",
            "mean",
            "stddev",
            "variance",
            "unique_upper",
            "unique_upper_ratio",
            "unique_est",
            "unique_est_ratio",
            "unique_lower",
            "unique_lower_ratio",
            "count_bool",
            "count_bool_ratio",
            "count_integral",
            "count_integral_ratio",
            "count_fractional",
            "count_fractional_ratio",
            "count_string",
            "count_string_ratio",
            "count_null",
            "count_null_ratio",
            "inferred_data_type",
            "quantile_5",
            "quantile_75",
            "quantile_25",
            "quantile_90",
            "quantile_95",
            "quantile_99"
         ],
         "type": "string"
      },
      "ThresholdType": {
         "title": "ThresholdType",
         "description": "Threshold Type declaring the upper and lower bound.\n\nBy default an anomaly will be generated when the target is above or below the baseline\nby the specified threshold.\n\nIf its only desirable to alert when the target is above the\nbaseline and not the other way around, specify upper for your ThresholdType.",
         "enum": [
            "lower",
            "upper"
         ]
      },
      "TimeRange": {
         "title": "TimeRange",
         "description": "Support for a specific time range.",
         "type": "object",
         "properties": {
            "start": {
               "title": "Start",
               "description": "Inclusive. Start time of a time range.",
               "type": "string",
               "format": "date-time"
            },
            "end": {
               "title": "End",
               "description": "Exclusive. End time of a time range.",
               "type": "string",
               "format": "date-time"
            }
         },
         "required": [
            "start",
            "end"
         ],
         "additionalProperties": false
      },
      "TrailingWindowBaseline": {
         "title": "TrailingWindowBaseline",
         "description": "A dynamic trailing window.\n\nThis is useful if you don't have a static baseline to monitor against. This is the default mode for most\nmonitors.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "TrailingWindow"
               ],
               "type": "string"
            },
            "size": {
               "title": "Size",
               "description": "Window size",
               "exclusiveMinimum": 3,
               "maximum": 90,
               "type": "integer"
            },
            "offset": {
               "title": "Offset",
               "description": "Offset from the current batch for the range of the trailing window. Default to 1 (the previous batch). This means that if set this to 0, the baseline will include the current batch's value, orif we set it o 7, then the window is off by 7.",
               "type": "integer"
            },
            "exclusionRanges": {
               "title": "ExclusionRanges",
               "description": "The list of exclusion ranges",
               "maxItems": 100,
               "type": "array",
               "items": {
                  "$ref": "#/definitions/TimeRange"
               }
            }
         },
         "required": [
            "type",
            "size"
         ],
         "additionalProperties": false
      },
      "TimeRangeBaseline": {
         "title": "TimeRangeBaseline",
         "description": "A static time range.\n\nInstead of using a single profile or a trailing window, user can lock in a \"good\" period.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "inheritSegment": {
               "title": "InheritSegment",
               "description": "Default to false. Whether to use the segment from the target to filter down the baseline",
               "type": "boolean"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "TimeRange"
               ],
               "type": "string"
            },
            "range": {
               "title": "Range",
               "description": "The range to set the time range with",
               "allOf": [
                  {
                     "$ref": "#/definitions/TimeRange"
                  }
               ]
            }
         },
         "required": [
            "type",
            "range"
         ],
         "additionalProperties": false
      },
      "ReferenceProfileId": {
         "title": "ReferenceProfileId",
         "description": "A baseline based on a static reference profile.\n\nA typical use case is to use a \"gold\" dataset and upload its profile to WhyLabs. This can be a training dataset\nas well for an ML model.",
         "type": "object",
         "properties": {
            "datasetId": {
               "title": "DatasetId",
               "description": "The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID does not exist, user will get a validation exception when saving the config with WhyLabs API",
               "maxLength": 100,
               "pattern": "[a-zA-Z0-9\\-_\\.]+",
               "type": "string"
            },
            "type": {
               "title": "Type",
               "enum": [
                  "Reference"
               ],
               "type": "string"
            },
            "profileId": {
               "title": "ProfileId",
               "description": "The unique profile ID for the reference profile",
               "maxLength": 100,
               "type": "string"
            }
         },
         "required": [
            "type",
            "profileId"
         ],
         "additionalProperties": false
      }
   }
}

Config:
  • schema_extra: function = <function AlgorithmConfig.Config.schema_extra at 0x1090d61f0>

Fields:
field baseline: TrailingWindowBaseline | TimeRangeBaseline | ReferenceProfileId [Required]#

A baseline for running the analyzer.

field factor: float | None = 3.0#

The multiplier used with stddev to build the upper and lower bounds.

field minBatchSize: int | None = 1#

Minimum number of batches that is required

Constraints:
  • minimum = 1

field type: stddev: 'stddev'>] [Required]#