#!/usr/bin/env python # coding: utf-8 # # Missing Values Visualizer Examples # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') import sys sys.path.append("./../..") # In[2]: get_ipython().run_line_magic('reload_ext', 'yellowbrick') get_ipython().run_line_magic('matplotlib', 'inline') # Imports import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib from sklearn.preprocessing import OneHotEncoder from yellowbrick.contrib.missing import MissingValuesDispersion, MissingValuesBar from sklearn.datasets import make_classification # # Use the Horse Colic Data Set # # Contains natural missing values in data # In[3]: headers = pd.read_csv("./horse-colic.attrs") # In[4]: url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/horse-colic/horse-colic.data' # Retrieve Data Set df = pd.read_csv(url, delim_whitespace=True) df.columns = headers.Attribute.values df.replace(to_replace="?", value=np.nan, inplace=True,) # In[5]: X = df.drop(['cp_data'], axis=1) y = df['cp_data'] # # Using Missing Values Dispersion Chart # In[6]: classes=['sick', 'healthy'] # ### No target y passed in, produces mono-color chat # In[7]: viz = MissingValuesDispersion(classes=classes) viz.fit(X) viz.show() # ### Target y passed in, produces Dispersion chart with elements colored by target variable # In[8]: viz = MissingValuesDispersion(classes=classes) viz.fit(X, y=y) viz.show() # # Using Missing Values Bar Chart # # ### No target y passed in, produces mono-color bar chart # In[9]: oz = MissingValuesBar(classes=classes) oz.fit(X) oz.show() # ### Target y passed in, produces stacked bar chart # In[10]: oz = MissingValuesBar(classes=classes) oz.fit(X, y=y) oz.show() # In[ ]: