#!/usr/bin/env python
# coding: utf-8
# # `popmon` introductory notebook
# This notebook contains examples of how to generate `popmon` reports from a pandas DataFrame.
# In[ ]:
# (optional) Adjust the jupyter notebook style for easier navigation of the reports
from IPython.core.display import display, HTML
# Wider notebook
display(HTML(""))
# Cells are higher by default
display(HTML(""))
# ## Setup `popmon` and load our dataset
# Install popmon (if not installed yet) in the current environment.
# In[ ]:
import sys
get_ipython().system('"{sys.executable}" -m pip install -q popmon')
# Import pandas and popmon, load and example dataset provided by popmon and show the first few results.
# In[ ]:
import pandas as pd
import popmon
from popmon import resources
# In[ ]:
df = pd.read_csv(resources.data("test.csv.gz"), parse_dates=["date"])
# In[ ]:
df.head()
# ## Reporting given a pandas.DataFrame
# In[ ]:
report = df.pm_stability_report(
# Use the 'date' column as our time axis
time_axis="date",
# Create batches for every two weeks of data
time_width="2w",
# Select a subset of features
features=["date:age", "date:isActive", "date:eyeColor"],
)
# In[ ]:
report
# ### Regenerate the report
# You can change the report parameters without having to rerun the computational part of the pipeline using the `regenerate` method. For example: a short (limited) report will be generated since `extended_report` flag is set to `False`. If a user wants to configure which statistics she/he wants to see, `show_stats` argument has to be set accordingly.
#
# Another option is to change the `plot_hist_n` parameter to control the number of histograms being displayed per feature.
# In[ ]:
report.regenerate(extended_report=False, plot_hist_n=3)
# ## Reporting given a histograms
# If the user would like to generate the report directly from histograms, then popmon also supports that.
# First, we generate histograms, (but we could load pre-generated histograms from a pickle or json file as well)
# In[ ]:
hists = df.pm_make_histograms(
time_axis="date",
time_width="2w",
features=["date:age", "date:gender", "date:isActive"],
)
# In[ ]:
list(hists.keys())
# And then generate the report based on histograms:
# In[ ]:
report = popmon.stability_report(hists)
# In[ ]:
report