#!/usr/bin/env python # coding: utf-8 #

Table of Contents

#
# # Naive Bayes classifier # Computing the posterior probability of $X$ being from class $c$ using **Bayes** rule # $$ P(y_c|X)= \frac{P(X|y_c)P(y_c)}{P(X)}$$ # From estimated prior probability of class # $$P(y_c)$$ # and probabity of $X$ # $$P(X|y_c)$$ # # Assumption for this classifier is that data is gaussian. # # Implimentation of NaiveBayes classifier is done following above equations only. Source code is ***probabilistic*** library. # ## Importing libraries # In[1]: import numpy as np import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split # ## Importing NaveBayes classifier from spkit # In[2]: from spkit.ml import NaiveBayes # # Example 1 : Iris data set # In[3]: data = datasets.load_iris() X = data.data y = data.target Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3) print(Xt.shape,yt.shape,Xs.shape,ys.shape) # ## Fitting model (Estimating the parameters) # In[4]: clf = NaiveBayes() clf.fit(Xt,yt) # ## Prediction and Accuracy # In[5]: ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy : ',np.mean(ytp==yt)) print('Testing Accuracy : ',np.mean(ysp==ys)) # ## Predicting probabilities # In[6]: ytpr = clf.predict_prob(Xt) yspr = clf.predict_prob(Xs) # In[7]: ytpr[0] # In[8]: clf.predict(Xs[0]), clf.predict_prob(Xs[0]) # ## Parameters :: $\mu$, $\sigma$ # In[9]: clf.parameters # ## Visualizing the distribution # Setting the names of classes and features (Optional) # In[10]: clf.set_class_labels(data['target_names']) clf.set_feature_names(data['feature_names']) # In[11]: fig = plt.figure(figsize=(12,10)) clf.VizPx() # # Example 2: Breast Cancer # In[12]: data = datasets.load_breast_cancer() X = data.data y = data.target Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3) print(Xt.shape,yt.shape,Xs.shape,ys.shape) # ## Fitting model (estimating the parameters) # In[13]: clf = NaiveBayes() clf.fit(Xt,yt) # ## Accuracy # In[14]: ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy : ',np.mean(ytp==yt)) print('Testing Accuracy : ',np.mean(ysp==ys)) # ## Parameters :: $\mu$, $\sigma$ # In[15]: clf.parameters[0] # class 0 # In[16]: clf.set_class_labels(data['target_names']) #clf.set_feature_names(data['feature_names']) # ## Visualizing first 16 features # In[17]: fig = plt.figure(figsize=(12,10)) clf.VizPx(nfeatures=range(16)) # ## Visualizing next 14 features # In[18]: fig = plt.figure(figsize=(12,10)) clf.VizPx(nfeatures=range(16,30)) # # Example 3:: Digit Classification # In[19]: data = datasets.load_digits() X = data.data y = data.target # Avoiding features with zero variance (contant value) #X = X[:,X.var(0)>0] Xt,Xs,yt,ys = train_test_split(X,y,test_size=0.3) print(Xt.shape,yt.shape,Xs.shape,ys.shape) # ## Fitting model (estimating the parameters) # In[20]: clf = NaiveBayes() clf.fit(Xt,yt) # ## Accuracy # In[21]: ytp = clf.predict(Xt) ysp = clf.predict(Xs) print('Training Accuracy : ',np.mean(ytp==yt)) print('Testing Accuracy : ',np.mean(ysp==ys)) # ## Predicting probablities # In[22]: clf.predict(Xs[0]), clf.predict_prob(Xs[0]) # In[23]: plt.imshow(Xs[0].reshape([8,8]),cmap='gray') plt.axis('off') plt.show() print('Prediction',clf.predict(Xs[0])) # ## Visualizing # In[24]: fig = plt.figure(figsize=(12,10)) clf.VizPx(nfeatures=range(5,19)) # In[ ]: