#!/usr/bin/env python # coding: utf-8 # In[1]: # Load UCI census and convert to json for sending to the visualization import pandas as pd features = ["Age", "Workclass", "fnlwgt", "Education", "Education-Num", "Marital Status", "Occupation", "Relationship", "Race", "Sex", "Capital Gain", "Capital Loss", "Hours per week", "Country", "Target"] # Load dataframe from external CSV and add header information df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test", names=features, # name features for header row sep=r'\s*,\s*', # separator used in this dataset engine='python', skiprows=[0], # skip first row without data na_values="?") # add ? where data is missing # set the sprite_size based on the number of records in dataset, # larger datasets can crash the browser if the size is too large (>50000) sprite_size = 32 if len(df.index)>50000 else 64 jsonstr = df.to_json(orient='records') # In[2]: # Display the Dive visualization for this data from IPython.core.display import display, HTML # Create Facets template HTML_TEMPLATE = """<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html"> <facets-dive sprite-image-width="{sprite_size}" sprite-image-height="{sprite_size}" id="elem" height="600"></facets-dive> <script> document.querySelector("#elem").data = {jsonstr}; </script>""" # Load the json dataset and the sprite_size into the template html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size) # Display the template display(HTML(html)) # In[ ]: