%pip install whylogs import whylogs as why from whylogs.core.datatypes import Fractional, String from whylogs.experimental.core.udf_schema import ( register_dataset_udf, register_multioutput_udf, register_type_udf, udf_schema ) from whylogs.experimental.core.metrics.udf_metric import register_metric_udf from typing import Any, Dict, List, Union import pandas as pd @register_dataset_udf(["mass", "volume"]) def density(data: Union[Dict[str, List], pd.DataFrame]) -> Union[List, pd.Series]: if isinstance(data, pd.DataFrame): return data["mass"] / data["volume"] else: return [mass / volume for mass, volume in zip(data["mass"], data["volume"])] @register_multioutput_udfs(["x"]) def powers(data: Union[Dict[str, List], pd.DataFrame]) -> Union[Dict[str, List], pd.DataFrame]: if isinstance(data, pd.DataFrame): result = pd.DataFrame() result["xx"] = data["x"] * data["x"] result["xxx"] = data["x"] * data["x"] * data["x"] return result else: result = {"xx": [data["x"][0] * data["x"][0]]} result["xxx"] = [data["x"][0] * data["x"][0] * data["x"][0]] return result @register_type_udf(Fractional) def square(input: Union[List, pd.Series]) -> Union[List, pd.Series]: return [x * x for x in input] @register_metric_udf(col_type=String) def upper(input: Any) -> Any: return input.upper() df = pd.DataFrame({ "mass": [1, 2, 3], "volume": [4, 5, 6], "score": [1.9, 4.2, 3.1], "lower": ["a", "b", "c"], "x": [1, 2, 3] }) schema = udf_schema() result = why.log(df, schema=schema) result.view().to_pandas() result.view().get_column("lower").get_metric("udf").to_summary_dict()["upper:frequent_items/frequent_strings"] new_df, _ = schema.apply_udfs(df) new_df _, new_row = schema.apply_udfs(row={"mass": 4, "volume": 7, "score": 2.0, "lower": "d"}) new_row