import altair
import attr
import cattr
import yaml
import enum
from IPython.display import display
from sklearn.neighbors import LocalOutlierFactor
from sklearn.datasets import load_iris, load_linnerud
@enum.unique
class Dataset(enum.Enum):
LINNERUD = "linnerud"
IRIS = "iris"
@attr.frozen
class ConfigSchema:
dataset: Dataset
outlier_n: int
plot_x: str
plot_y: str
with open("config_b.yaml") as f:
config = cattr.structure(yaml.safe_load(f), ConfigSchema)
if config.dataset == Dataset.LINNERUD:
data = load_linnerud(as_frame=True).data
elif config.dataset == Dataset.IRIS:
data = load_iris(as_frame=True).data
else:
raise ValueError(f"Unsupported dataset {config.dataset}")
data["Outlier"] = (
LocalOutlierFactor(config.outlier_n)
.fit_predict(data) == -1
)
altair.Chart(data).mark_point().encode(
x=config.plot_x,
y=config.plot_y,
color="Outlier"
)