#!/usr/bin/env python
# coding: utf-8

# In[1]:


get_ipython().run_line_magic('load_ext', 'autoreload')

get_ipython().run_line_magic('autoreload', '2')

import sys
sys.path.append("./../..")


# In[2]:


get_ipython().run_line_magic('reload_ext', 'yellowbrick')
get_ipython().run_line_magic('matplotlib', 'inline')
# Imports
import pandas as pd  
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

from sklearn.cross_validation import train_test_split  
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import precision_recall_curve  

from yellowbrick.style.palettes import get_color_cycle, PALETTES
from yellowbrick.style.colors import resolve_colors
from yellowbrick.base import ModelVisualizer
from yellowbrick.classifier import ThresholdVisualizer, thresholdviz


# In[3]:


# Retrieve Data Set
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data', header=None)
df.rename(columns={57:'is_spam'}, inplace=True)

# Build the classifier and get the predictions
model = BernoulliNB(3)

X = df[[col for col in df.columns if col != 'is_spam']]  
y = df['is_spam']


# In[4]:


viz = ThresholdVisualizer(model, n_trials=100, title="Spam vs Ham Thresholds", quantiles=(0.10, 0.5, .9))
viz.fit_show(X, y)


# In[5]:


thresholdviz(model, X, y)


# In[ ]:


# In[ ]:


# In[ ]: