%load_ext autoreload
%autoreload 2
import sys
sys.path.append("./../..")
%reload_ext yellowbrick
%matplotlib inline
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.preprocessing import OneHotEncoder
from yellowbrick.contrib.missing import MissingValuesDispersion, MissingValuesBar
from sklearn.datasets import make_classification
Contains natural missing values in data
headers = pd.read_csv("./horse-colic.attrs")
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/horse-colic/horse-colic.data'
# Retrieve Data Set
df = pd.read_csv(url, delim_whitespace=True)
df.columns = headers.Attribute.values
df.replace(to_replace="?", value=np.nan, inplace=True,)
X = df.drop(['cp_data'], axis=1)
y = df['cp_data']
classes=['sick', 'healthy']
viz = MissingValuesDispersion(classes=classes)
viz.fit(X)
viz.show()
No handles with labels found to put in legend.
viz = MissingValuesDispersion(classes=classes)
viz.fit(X, y=y)
viz.show()
oz = MissingValuesBar(classes=classes)
oz.fit(X)
oz.show()
No handles with labels found to put in legend.
oz = MissingValuesBar(classes=classes)
oz.fit(X, y=y)
oz.show()