#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') import sys sys.path.append("./../..") # In[2]: get_ipython().run_line_magic('reload_ext', 'yellowbrick') get_ipython().run_line_magic('matplotlib', 'inline') # Imports import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib from sklearn.cross_validation import train_test_split from sklearn.naive_bayes import BernoulliNB from sklearn.metrics import precision_recall_curve from yellowbrick.style.palettes import get_color_cycle, PALETTES from yellowbrick.style.colors import resolve_colors from yellowbrick.base import ModelVisualizer from yellowbrick.classifier import ThresholdVisualizer, thresholdviz # In[3]: # Retrieve Data Set df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data', header=None) df.rename(columns={57:'is_spam'}, inplace=True) # Build the classifier and get the predictions model = BernoulliNB(3) X = df[[col for col in df.columns if col != 'is_spam']] y = df['is_spam'] # In[4]: viz = ThresholdVisualizer(model, n_trials=100, title="Spam vs Ham Thresholds", quantiles=(0.10, 0.5, .9)) viz.fit_show(X, y) # In[5]: thresholdviz(model, X, y) # In[ ]: # In[ ]: # In[ ]: