🚩 Create a free WhyLabs account to get more value out of whylogs!
Did you know you can store, visualize, and monitor whylogs profiles with the WhyLabs Observability Platform? Sign up for a free WhyLabs account to leverage the power of whylogs and WhyLabs together!
# Note: you may need to restart the kernel to use updated packages.
%pip install whylogs
If all of the state of the metric can be represented by subclasses of MetricComponent
, it's very simple to create a new metric. There are a number of standard metric components in metric_components.py. You can also create new components by subclassing CustomComponent
.
from dataclasses import dataclass
from typing import Any, Dict, List
import pickle
import whylogs_sketching as ds # type: ignore
from whylogs.core.configs import SummaryConfig
from whylogs.core.dataset_profile import DatasetProfile
from whylogs.core.datatypes import DataType
from whylogs.core.metrics.metric_components import KllComponent
from whylogs.core.metrics.metrics import CustomMetricBase, Metric, MetricConfig, OperationResult
from whylogs.core.preprocessing import PreprocessedColumn
from whylogs.core.resolvers import Resolver
from whylogs.core.schema import DatasetSchema
from whylogs.core.preprocessing import PreprocessedColumn
from whylogs.core.proto import MetricMessage, MetricComponentMessage
# Metric classes should be decorated with @dataclass
@dataclass(frozen=True)
class HistogramMetric(Metric):
histogram: KllComponent # All the fields are subclasses of MetricComonent
# you must implement namespace returning a unique string to identify your metric
@property
def namespace(self) -> str:
return "histogram"
# you must implement to_summary_dict returning a summary of your metric
def to_summary_dict(self, cfg: SummaryConfig) -> Dict[str, Any]:
if self.histogram.value.get_n() == 0:
quantiles = [None, None, None, None, None]
else:
quantiles = self.histogram.value.get_quantiles([0.1, 0.25, 0.5, 0.75, 0.9])
return {
"n": self.histogram.value.get_n(),
"max": self.histogram.value.get_max_value(),
"min": self.histogram.value.get_min_value(),
"q_10": quantiles[0],
"q_25": quantiles[1],
"median": quantiles[2],
"q_75": quantiles[3],
"q_90": quantiles[4],
}
# columnar_update updates your metric as data is logged
def columnar_update(self, data: PreprocessedColumn) -> OperationResult:
successes = 0
if data.numpy.len > 0:
for arr in [data.numpy.floats, data.numpy.ints]:
if arr is not None:
self.histogram.value.update(arr)
for lst in [data.list.ints, data.list.floats]:
if lst is not None and len(lst) > 0:
self.histogram.value.update_list(lst)
return OperationResult.ok(successes)
# The zero method returns an "empty" instance of your metric ready to start tracking data
# If your metric needs configuration, create a subclass of MetricConfig containing your
# parameters.
@classmethod
def zero(cls, config: MetricConfig) -> "HistogramMetric":
return cls(histogram=KllComponent(ds.kll_doubles_sketch(k=config.kll_k)))
If you prefer not to use MetricComponent
fields for your metric, you can instead make your metric a subclass of CustomMetricBase
. All fields whose names don't start with _
will be included in the metric summary and serialized via protobuf.
@dataclass
class StructMetric(CustomMetricBase):
x: int
s: str
_private: float = 3.14159 # excluded from summary and protobuf
@property
def namespace(self) -> str:
return "struct"
# you must implement your own merge method
def merge(self, other: "StructMetric") -> "StructMetric":
return StructMetric(self.x + other.x, self.s + other.s)
def columnar_update(self, data: PreprocessedColumn) -> OperationResult:
self.x += 1
self.s += "a"
return OperationResult.ok(1)
@classmethod
def zero(cls, config: MetricConfig) -> "StructMetric":
return cls(0, "")
You will need to create a Resolver
and DatasetSchema
in order to use your metric.
from whylogs.core import ColumnSchema
class TestResolver(Resolver):
def resolve(self, name: str, why_type: DataType, column_schema: ColumnSchema) -> Dict[str, Metric]:
return {"histogram": HistogramMetric.zero(column_schema.cfg),
"struct": StructMetric(0, "")}
schema = DatasetSchema(types={"col1": float}, resolvers=TestResolver())
prof = DatasetProfile(schema)
row = {"col1": 1.2}
prof.track(row=row)
prof.view().to_pandas()
histogram/n | histogram/max | histogram/min | histogram/q_10 | histogram/q_25 | histogram/median | histogram/q_75 | histogram/q_90 | struct/x | struct/s | type | |
---|---|---|---|---|---|---|---|---|---|---|---|
column | |||||||||||
col1 | 1 | 1.2 | 1.2 | 1.2 | 1.2 | 1.2 | 1.2 | 1.2 | 2 | aa | SummaryType.COLUMN |