#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import numpy as np import matplotlib.pyplot as plt # In[2]: # Just a simple convenience function to send the internal python # logs to stdout. Definitely not required from whylogs.logs import display_logging display_logging('debug') # ## Load data # In[3]: # Load some data df = pd.read_csv('lending_club_1000.csv') # Split into a test & training set df_training = df.sample(int(len(df) * 0.8), replace=False, random_state=123) df_test = df.drop(df_training.index) df.head() # ## Log dataset sketches # In[4]: from whylogs import get_or_create_session session = get_or_create_session() # #### Log dataframe # In[5]: session.log_dataframe(df_training, 'training.data') # Then you could do whatever training or calculations you'd like # ### Inspect profiles/statistics # In[6]: # You can also capture the logger response and interact with the generated # profiles # Log the test data prof = session.log_dataframe(df_test, 'test.data') summary = prof.flat_summary() stats_df = summary['summary'] stats_df # In[7]: # See one of the inspected histograms hist_data = summary['hist']['fico_range_high'] bins = hist_data['bin_edges'] n = hist_data['counts'] bin_width = np.diff(bins) plt.bar(bins[0:-1], n, bin_width, align='edge') # ## Load logged data # In[8]: import glob # ### Load flat table statistics # In[9]: # Load the flat table statistics from the 'test.data' dataset fnames = glob.glob('whylogs-output/test.data/dataset_summary/flat_table/dataset_summary*.csv') fnames.sort() # Load the most recent file test_stats = pd.read_csv(fnames[-1]) test_stats # ### Load the full dataset profile sketch # In[10]: from whylogs import DatasetProfile # Load a dataset profile from the 'test.data' dataset fnames = glob.glob('whylogs-output/test.data/dataset_profile/protobuf/*.bin') fnames.sort() test_prof = DatasetProfile.read_protobuf(fnames[-1], delimited_file=False) test_prof # --- # In[11]: # Not necessary, but you can reset the WhyLogs session if you want from whylogs import reset_default_session reset_default_session() # In[ ]: