#!/usr/bin/env python # coding: utf-8 # # `popmon` introductory notebook # This notebook contains examples of how to generate `popmon` reports from a pandas DataFrame. # In[ ]: # (optional) Adjust the jupyter notebook style for easier navigation of the reports from IPython.core.display import display, HTML # Wider notebook display(HTML("")) # Cells are higher by default display(HTML("")) # ## Setup `popmon` and load our dataset # Install popmon (if not installed yet) in the current environment. # In[ ]: import sys get_ipython().system('"{sys.executable}" -m pip install -q popmon') # Import pandas and popmon, load and example dataset provided by popmon and show the first few results. # In[ ]: import pandas as pd import popmon from popmon import resources # In[ ]: df = pd.read_csv(resources.data("test.csv.gz"), parse_dates=["date"]) # In[ ]: df.head() # ## Reporting given a pandas.DataFrame # In[ ]: report = df.pm_stability_report( # Use the 'date' column as our time axis time_axis="date", # Create batches for every two weeks of data time_width="2w", # Select a subset of features features=["date:age", "date:isActive", "date:eyeColor"], ) # In[ ]: report # ### Regenerate the report # You can change the report parameters without having to rerun the computational part of the pipeline using the `regenerate` method. For example: a short (limited) report will be generated since `extended_report` flag is set to `False`. If a user wants to configure which statistics she/he wants to see, `show_stats` argument has to be set accordingly. # # Another option is to change the `plot_hist_n` parameter to control the number of histograms being displayed per feature. # In[ ]: report.regenerate(extended_report=False, plot_hist_n=3) # ## Reporting given a histograms # If the user would like to generate the report directly from histograms, then popmon also supports that. # First, we generate histograms, (but we could load pre-generated histograms from a pickle or json file as well) # In[ ]: hists = df.pm_make_histograms( time_axis="date", time_width="2w", features=["date:age", "date:gender", "date:isActive"], ) # In[ ]: list(hists.keys()) # And then generate the report based on histograms: # In[ ]: report = popmon.stability_report(hists) # In[ ]: report