#!/usr/bin/env python
# coding: utf-8
#
Table of Contents
#
# # Naive Bayes classifier
# Computing the posterior probability of $X$ being from class $c$ using **Bayes** rule
# $$ P(y_c|X)= \frac{P(X|y_c)P(y_c)}{P(X)}$$
# From estimated prior probability of class
# $$P(y_c)$$
# and probabity of $X$
# $$P(X|y_c)$$
#
# Assumption for this classifier is that data is gaussian.
#
# Implimentation of NaiveBayes classifier is done following above equations only. Source code is ***probabilistic*** library.
# ## Importing libraries
# In[1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
# ## Importing NaveBayes classifier from spkit
# In[2]:
from spkit.ml import NaiveBayes
# # Example 1 : Iris data set
# In[3]:
data = datasets.load_iris()
X = data.data
y = data.target
Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)
print(Xt.shape,yt.shape,Xs.shape,ys.shape)
# ## Fitting model (Estimating the parameters)
# In[4]:
clf = NaiveBayes()
clf.fit(Xt,yt)
# ## Prediction and Accuracy
# In[5]:
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing Accuracy : ',np.mean(ysp==ys))
# ## Predicting probabilities
# In[6]:
ytpr = clf.predict_prob(Xt)
yspr = clf.predict_prob(Xs)
# In[7]:
ytpr[0]
# In[8]:
clf.predict(Xs[0]), clf.predict_prob(Xs[0])
# ## Parameters :: $\mu$, $\sigma$
# In[9]:
clf.parameters
# ## Visualizing the distribution
# Setting the names of classes and features (Optional)
# In[10]:
clf.set_class_labels(data['target_names'])
clf.set_feature_names(data['feature_names'])
# In[11]:
fig = plt.figure(figsize=(12,10))
clf.VizPx()
# # Example 2: Breast Cancer
# In[12]:
data = datasets.load_breast_cancer()
X = data.data
y = data.target
Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)
print(Xt.shape,yt.shape,Xs.shape,ys.shape)
# ## Fitting model (estimating the parameters)
# In[13]:
clf = NaiveBayes()
clf.fit(Xt,yt)
# ## Accuracy
# In[14]:
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing Accuracy : ',np.mean(ysp==ys))
# ## Parameters :: $\mu$, $\sigma$
# In[15]:
clf.parameters[0] # class 0
# In[16]:
clf.set_class_labels(data['target_names'])
#clf.set_feature_names(data['feature_names'])
# ## Visualizing first 16 features
# In[17]:
fig = plt.figure(figsize=(12,10))
clf.VizPx(nfeatures=range(16))
# ## Visualizing next 14 features
# In[18]:
fig = plt.figure(figsize=(12,10))
clf.VizPx(nfeatures=range(16,30))
# # Example 3:: Digit Classification
# In[19]:
data = datasets.load_digits()
X = data.data
y = data.target
# Avoiding features with zero variance (contant value)
#X = X[:,X.var(0)>0]
Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3)
print(Xt.shape,yt.shape,Xs.shape,ys.shape)
# ## Fitting model (estimating the parameters)
# In[20]:
clf = NaiveBayes()
clf.fit(Xt,yt)
# ## Accuracy
# In[21]:
ytp = clf.predict(Xt)
ysp = clf.predict(Xs)
print('Training Accuracy : ',np.mean(ytp==yt))
print('Testing Accuracy : ',np.mean(ysp==ys))
# ## Predicting probablities
# In[22]:
clf.predict(Xs[0]), clf.predict_prob(Xs[0])
# In[23]:
plt.imshow(Xs[0].reshape([8,8]),cmap='gray')
plt.axis('off')
plt.show()
print('Prediction',clf.predict(Xs[0]))
# ## Visualizing
# In[24]:
fig = plt.figure(figsize=(12,10))
clf.VizPx(nfeatures=range(5,19))
# In[ ]: