#!/usr/bin/env python # coding: utf-8 # ### Lab 3: From binary classification to multiclass classification # In this demo, we cover binary classication through the OLS criterion. we consider both the K class discriminant as well as the one vs one and one vs rest classifiers. For each, we plot the decision bounndary using the meshgrid function from numpy. # #### Part I. K class discriminant # # The first and less ambiguous approach is to consider a single K classes discriminant. In this case, the first step is to encode the target using a binary representation $t = [0,1,0\ldots, 0]$ with $t^{(i)}_k = 1$ if $x^{(i)}$ is in class $k$. We can then solve the normal equations. # In[2]: from sklearn.datasets import make_blobs import numpy as np X, y = make_blobs(n_samples=30, centers=3, n_features=2) import matplotlib.pyplot as plt #plt.scatter(X[:,0], X[:,1], c=) print(y) from sklearn.preprocessing import OneHotEncoder enc = OneHotEncoder() enc.fit(y.reshape(-1,1)) T = enc.transform(y.reshape(-1,1)).toarray() T = np.array(T) # In[3]: np.shape(np.array(T)) # In[4]: import numpy as np from numpy.linalg import inv Xtilde = np.hstack((np.ones((np.shape(X)[0],1)), X)) XX = np.matmul(Xtilde.T, Xtilde) XT = np.matmul(Xtilde.T, T) Beta = np.matmul(inv(XX), XT) print(Beta) # In[5]: plt.scatter(X[:,0], X[:,1], c = y) plt.show() # In[6]: import numpy as np x1min = np.min(X[:,0]) x1max = np.max(X[:,0]) x2min = np.min(X[:,1]) x2max = np.max(X[:,1]) xx1, xx2 = np.meshgrid(np.linspace(x1min, x1max, 100), np.linspace(x2min, x2max, 100)) Xpredict = np.vstack((xx1.flatten(), xx2.flatten())).T XtildePredict = np.hstack((np.ones((np.shape(Xpredict)[0],1)), Xpredict)) prediction = np.matmul(Beta.T,XtildePredict.T) print(np.shape(prediction)) predictedTargets = np.zeros((len(xx1.flatten()), 1)) for i in range(len(xx1.flatten())): predictedTargets[i] = np.argmax(prediction[:,i]) plt.scatter(X[:,0], X[:,1], c = y) plt.contourf(xx1, xx2, np.reshape(predictedTargets,np.shape(xx1)), alpha = .3) plt.show() # #### Part II: One vs One # # In this case, we need to learn K(K-1)/2 = 3 classifiers. The classification is then made through a majority vote. Some ambiguity might remain when the maximum number of votes is shared equally among multiple classes. # In[7]: # one vs one num_classes = max(y)+1 from __future__ import division num_classifiers = int(num_classes*(num_classes-1)/2) ## generating the grid for the final display x1min = np.min(X[:,0]) x1max = np.max(X[:,0]) x2min = np.min(X[:,1]) x2max = np.max(X[:,1]) xx1, xx2 = np.meshgrid(np.linspace(x1min, x1max, 100), np.linspace(x2min, x2max, 100)) Xpredict = np.vstack((xx1.flatten(), xx2.flatten())).T Xtilde_predict = np.hstack((np.ones((np.shape(Xpredict)[0],1)), Xpredict)) PredictionMatrix = np.zeros((len(xx1.flatten()),num_classifiers)) counter = 0 for i in range(num_classes): for j in range(num_classes): if j> i: print('('+str(i)+str(j)+')') indices_i = np.squeeze(np.where(y==i)) indices_j = np.squeeze(np.where(y==j)) points_classi = Xtilde[indices_i,:] points_classj = Xtilde[indices_j,:] Xtilde_ij = np.vstack((points_classi, points_classj)) target_i = np.ones((len(indices_i),1)) target_j = np.zeros((len(indices_j),1)) target_ij = np.vstack((target_i, target_j)) # learning the plane XX = np.matmul(Xtilde_ij.T, Xtilde_ij) XT = np.matmul(Xtilde_ij.T, target_ij) beta_ij = np.matmul(inv(XX), XT) prediction_ij = np.matmul(XtildePredict, beta_ij) target_final_ij = np.zeros((len(prediction_ij),1)) Test_indices_i = np.squeeze(np.where(prediction_ij>0.5)) target_final_ij[Test_indices_i] = i Test_indices_j = np.squeeze(np.where(prediction_ij<=0.5)) target_final_ij[Test_indices_j] = j PredictionMatrix[:,counter] = np.squeeze(target_final_ij) counter +=1 # In[37]: print(PredictionMatrix) # To get the final classification, we now use a majority vote. Relying on the classes stored in 'PredictionMatrix' we count the number of times each sample is classified in a particular class. we then associate the most represented class to each point. In the case of a draw we illustrate the result with a fourth class. # In[42]: final_class = np.zeros((np.shape(PredictionMatrix)[0],1)) for i in np.arange(np.shape(PredictionMatrix)[0]): count = np.zeros((3,1)) for j in np.arange(3): tmp1 = np.where(PredictionMatrix[i,:]== j) numVotesForj = len(tmp1[0]) count[j] = numVotesForj maxVotes = np.max(count) tmp2 = np.where(count == maxVotes) Is_Vote_ambiguous = len(tmp2[0])>1 if not Is_Vote_ambiguous: final_class[i] = np.argmax(count) else: final_class[i] = 3 print('decision is ambiguous') # In[43]: # displaying the result plt.scatter(X[:,0], X[:,1], c = y) plt.contourf(xx1, xx2, np.reshape(final_class,np.shape(xx1)), alpha = .3) plt.show() # #### Part III: One vs rest # # Here we need to compute N-1 classifier for each of the K-1 vs rest classes. The last (i.e. K) class is defined from the points that haven't been put in any of the K-1 previous classes. The only difference with the one vs one classifier lies in the relabelling of the samples at each step. # In[63]: # one vs one num_classes = max(y)+1 from __future__ import division num_classifiers = int(num_classes*(num_classes-1)/2) ## generating the grid for the final display x1min = np.min(X[:,0]) x1max = np.max(X[:,0]) x2min = np.min(X[:,1]) x2max = np.max(X[:,1]) xx1, xx2 = np.meshgrid(np.linspace(x1min, x1max, 100), np.linspace(x2min, x2max, 100)) Xpredict = np.vstack((xx1.flatten(), xx2.flatten())).T Xtilde_predict = np.hstack((np.ones((np.shape(Xpredict)[0],1)), Xpredict)) prediction_final = 2*np.ones((len(xx1.flatten()),1)) indices_Allclasses = np.zeros((len(xx1.flatten()),num_classes-1)) counter = 0 for i in np.arange(num_classes-1): print('('+str(i)+')') indices_i = np.squeeze(np.where(y==i)) indices = np.zeros((len(y),1)) indices[indices_i] = 1 points_classi = Xtilde[np.where(indices==1)[0],:] points_rest = Xtilde[np.where(indices==0)[0],:] Xtilde_total = np.vstack((points_classi, points_rest)) target_i = np.ones((len(np.where(indices==1)[0]),1)) target_rest = np.zeros((len(np.where(indices==0)[0]),1)) target_total = np.vstack((target_i, target_rest)) # learning the plane XX = np.matmul(Xtilde_total.T, Xtilde_total) XT = np.matmul(Xtilde_total.T, target_total) beta_i = np.matmul(inv(XX), XT) prediction_i = np.matmul(XtildePredict, beta_i) indices_classi = np.where(prediction_i>0.5)[0] # checking for possible ambiguity indices_Allclasses[indices_classi,i] = 1 prediction_final[indices_classi] = i # In[69]: print(ambiguous_indices) # In[72]: # displaying ambiguous classifications ambiguous_indices = np.where(np.sum(indices_Allclasses,axis=1)==2)[0] prediction_final[ambiguous_indices]=-1 # In[73]: # displaying the result, and highlighting ambiguities plt.scatter(X[:,0], X[:,1], c = y) plt.contourf(xx1, xx2, np.reshape(prediction_final,np.shape(xx1)), alpha = .3) plt.show()