In [1]:

%load_ext autoreload

%autoreload 2

import sys
sys.path.append("./../..")

In [2]:

%reload_ext yellowbrick
%matplotlib inline
# Imports
import pandas as pd  
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

from sklearn.cross_validation import train_test_split  
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import precision_recall_curve  

from yellowbrick.style.palettes import get_color_cycle, PALETTES
from yellowbrick.style.colors import resolve_colors
from yellowbrick.base import ModelVisualizer
from yellowbrick.classifier import ThresholdVisualizer, thresholdviz

/usr/local/var/pyenv/versions/3.5.2/envs/yb-dev/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)

In [3]:

# Retrieve Data Set
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data', header=None)
df.rename(columns={57:'is_spam'}, inplace=True)

# Build the classifier and get the predictions
model = BernoulliNB(3)

X = df[[col for col in df.columns if col != 'is_spam']]  
y = df['is_spam']

In [4]:

viz = ThresholdVisualizer(model, n_trials=100, title="Spam vs Ham Thresholds", quantiles=(0.10, 0.5, .9))
viz.fit_show(X, y)

Out[4]:

ThresholdVisualizer(model=None, n_trials=100, quantiles=(0.1, 0.5, 0.9),
          random_state=None, test_size_percent=0.1)

In [5]:

thresholdviz(model, X, y)

Out[5]:

<matplotlib.axes._subplots.AxesSubplot at 0x108a70390>

In [ ]: