#!/usr/bin/env python # coding: utf-8 # In[190]: import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn import svm sns.set(rc={'figure.figsize':(10,7.5)}) # 1. This problem involves hyperplanes in two dimensions. # # (a) Sketch the hyperplane `1 + 3X1 − X2 = 0`. Indicate the set of points for which `1+3X1 −X2 > 0`, as well as the set of points for which `1 + 3X1 − X2 < 0` # In[171]: # points on the hyperplane x1 = np.linspace(0, 10, 2) x2 = (3 * x1) + 1 # plot the hyperplane sns.lineplot(x=x1, y=x2, color='tab:gray') # plot the contours xx1 = np.linspace(x1.min(), x1.max(), 20) xx2 = np.linspace(x2.min(), x2.max(), 20) xx1, xx2 = np.meshgrid(xx1, xx2) zz = (1 + (3 * xx1.ravel()) - xx2.ravel()).reshape(xx1.shape) plt.contourf(xx1, xx2, zz, cmap='coolwarm') plt.colorbar() # (b) On the same plot, sketch the hyperplane `−2 + X1 + 2X2 = 0`. Indicate the set of points for which `−2 + X1 + 2X2 > 0`, as well as the set of points for which `−2 + X1 + 2X2 < 0`. # In[174]: # points on the hyperplane x1 = np.linspace(0, 10, 2) x2 = 1 - (x1 / 2) # plot the hyperplane sns.lineplot(x=x1, y=x2, color='tab:gray') # plot the contours xx1 = np.linspace(x1.min(), x1.max(), 20) xx2 = np.linspace(x2.min(), x2.max(), 20) xx1, xx2 = np.meshgrid(xx1, xx2) zz = (-2 + xx1.ravel() + (2 * xx2.ravel())).reshape(xx1.shape) plt.contourf(xx1, xx2, zz,cmap='coolwarm') plt.colorbar() # 2. We have seen that in `p = 2` dimensions, a linear decision boundary takes the form `β0 +β1X1 +β2X2 = 0`. We now investigate a non-linear decision boundary. # # (a) Sketch the curve `(1+X1)2 +(2−X2)2 = 4.` # # (b) On your sketch, indicate the set of points for which `(1+X1)2 +(2−X2)2 >4`, # as well as the set of points for which `(1+X1)2 +(2−X2)2 ≤4`. # # (c) Suppose that a classifier assigns an observation to the blue class if `(1+X1)2 +(2−X2)2 >4` and to the red class otherwise. To what class are the observations classified: # # `(0, 0)`: R # # `(−1, 1)`: B # # `(2, 2)`: B # # `(3, 8)`: B # # (d) Argue that while the decision boundary in (c) is not linear in terms of `X1 and X2`, it is linear in terms of `X1, X1^2, X2, and X2`. # # .. observe its construction in code below from a linear combination of `X1, X1^2, X2, and X2`. # In[185]: # plot the contours x1 = np.linspace(-5, 5, 100) x2 = np.linspace(-3, 7, 100) xx1, xx2 = np.meshgrid(x1, x2) df = pd.DataFrame({'x1' : xx1.ravel(), 'x2' : xx2.ravel(), 'x1_sq' : xx1.ravel() ** 2, 'x2_sq' : xx2.ravel() ** 2, 'x1_i_sq' : np.repeat(1, xx1.ravel().shape[0]), 'x2_i_sq' : np.repeat(4, xx1.ravel().shape[0])}) coeff = pd.DataFrame([2,-4,1,1,1,1], index=df.columns) zz = (df @ coeff).values.reshape(xx1.shape) plt.contourf(x1, x2, zz,cmap='coolwarm', alpha=0.75) plt.colorbar() # 3. Here we explore the maximal margin classifier on a toy data set. # In[251]: toy_df = pd.DataFrame({'obs': np.arange(1, 8), 'x1' : [3,2,4,1,2,4,4], 'x2' : [4,2,4,4,1,3,1], 'y' : ['R','R','R', 'R', 'B','B','B']}) # (a) We are given `n = 7` observations in `p = 2` dimensions. For each observation, there is an associated class label. Sketch the observations. # In[252]: sns.scatterplot(x='x1', y='x2', hue='y', data=toy_df) # (b) Sketch the optimal separating hyperplane, and provide the equation for this hyperplane (of the form (9.1)). # In[276]: # obtain the hyperplane clf = svm.SVC(kernel='linear', C=10^6) clf.fit(toy_df.drop(['y', 'obs'], axis=1), toy_df.y) # obtain the coefficients coefs = pd.DataFrame(clf.coef_, columns=['x1', 'x2']) coefs['i'] = pd.Series(clf.intercept_) # plot the hyperplane x1 = np.linspace(0, 5, 2) x2 = (-coefs['x1'][0] * x1 - coefs['i'][0]) / coefs['x2'][0] sns.scatterplot(x='x1', y='x2', hue='y', data=toy_df) sns.lineplot(x=x1, y=x2) # (c) Describe the classification rule for the maximal margin classifier. It should be something along the lines of “Classify to Red if β0 + β1X1 + β2X2 > 0, and classify to Blue otherwise.” Provide the values for β0, β1, and β2. # In[277]: coefs # (d) On your sketch, indicate the margin for the maximal margin hyperplane. # # (e) Indicate the support vectors for the maximal margin classifier. # In[292]: # plot the points sns.scatterplot(x='x1', y='x2', hue='y', data=toy_df) # plot the hyperplane x1 = np.linspace(0.5, 5, 2) x2 = (-coefs['x1'][0] * x1 - coefs['i'][0]) / coefs['x2'][0] sns.lineplot(x=x1, y=x2, color='tab:gray') # plot the contours x1 = np.linspace(0, 5, 50) x2 = np.linspace(0, 5, 50) xx1, xx2 = np.meshgrid(x1, x2) df = pd.DataFrame({'i' : np.repeat(1, xx1.ravel().shape[0]), 'x1' : xx1.ravel(), 'x2' : xx2.ravel()}) zz = df @ coefs.T plt.contourf(xx1, xx2, zz.values.reshape(xx1.shape), cmap='coolwarm', alpha=0.5) plt.colorbar() # plot the support vectors sns.scatterplot(x=clf.support_vectors_[:,0], \ y=clf.support_vectors_[:,1], \ color='black', marker='+', s=500) # (f) Argue that a slight movement of the seventh observation would not affect the maximal margin hyperplane. # In[317]: # move the 7th point toy_df_2 = pd.DataFrame({'obs': np.arange(1, 8), 'x1' : [3,2,4,1,2,4,3.5], 'x2' : [4,2,4,4,1,3,1.5], 'y' : ['R','R','R', 'R', 'B','B','B']}) # plot the points sns.scatterplot(x='x1', y='x2', hue='y', data=toy_df_2) # obtain the hyperplane clf = svm.SVC(kernel='linear', C=10^6) clf.fit(toy_df_2.drop(['y', 'obs'], axis=1), toy_df_2.y) # obtain the coefficients coefs = pd.DataFrame(clf.coef_, columns=['x1', 'x2']) coefs['i'] = pd.Series(clf.intercept_) # plot the hyperplane x1 = np.linspace(0.5, 5, 2) x2 = (-coefs['x1'][0] * x1 - coefs['i'][0]) / coefs['x2'][0] sns.lineplot(x=x1, y=x2, color='tab:gray') # plot the contours x1 = np.linspace(0, 5, 50) x2 = np.linspace(0, 5, 50) xx1, xx2 = np.meshgrid(x1, x2) df = pd.DataFrame({'i' : np.repeat(1, xx1.ravel().shape[0]), 'x1' : xx1.ravel(), 'x2' : xx2.ravel()}) zz = df @ coefs.T plt.contourf(xx1, xx2, zz.values.reshape(xx1.shape), cmap='coolwarm', alpha=0.5) plt.colorbar() # plot the support vectors sns.scatterplot(x=clf.support_vectors_[:,0], \ y=clf.support_vectors_[:,1], \ color='black', marker='+', s=500) # (g) Sketch a hyperplane that is not the optimal separating hyperplane, and provide the equation for this hyperplane. # In[311]: # plot the points sns.scatterplot(x='x1', y='x2', hue='y', data=toy_df) # plot the hyperplane x1 = np.linspace(0.5, 5, 2) x2 = (-coefs['x1'][0] * x1 - coefs['i'][0]) / coefs['x2'][0] sns.lineplot(x=x1, y=x2, color='tab:gray') # plot the contours x1 = np.linspace(0, 5, 50) x2 = np.linspace(0, 5, 50) xx1, xx2 = np.meshgrid(x1, x2) df = pd.DataFrame({'i' : np.repeat(1, xx1.ravel().shape[0]), 'x1' : xx1.ravel(), 'x2' : xx2.ravel()}) zz = df @ coefs.T plt.contourf(xx1, xx2, zz.values.reshape(xx1.shape), cmap='coolwarm', alpha=0.5) plt.colorbar() # plot the support vectors sns.scatterplot(x=clf.support_vectors_[:,0], \ y=clf.support_vectors_[:,1], \ color='black', marker='+', s=500) ## END COPY PASTED FROM ABOVE # # plot another hyerplane that is not the optimal hyperplane b1 = 1.75 b2 = 2 i = 0 x1 = np.linspace(0, 5, 2) x2 = (b1 * x1 - i) / b2 sns.lineplot(x=x1, y=x2, color='tab:orange') # (h) Draw an additional observation on the plot so that the two classes are no longer separable by a hyperplane. # In[320]: # move the 7th point toy_df_2 = pd.DataFrame({'obs': np.arange(1, 9), 'x1' : [3,2,4,1,2,4,4,3], 'x2' : [4,2,4,4,1,3,1,3], 'y' : ['R','R','R', 'R', 'B','B','B', 'B']}) # plot the points sns.scatterplot(x='x1', y='x2', hue='y', data=toy_df_2) # obtain the hyperplane clf = svm.SVC(kernel='linear', C=10^6) clf.fit(toy_df_2.drop(['y', 'obs'], axis=1), toy_df_2.y) # obtain the coefficients coefs = pd.DataFrame(clf.coef_, columns=['x1', 'x2']) coefs['i'] = pd.Series(clf.intercept_) # plot the hyperplane x1 = np.linspace(0.5, 5, 2) x2 = (-coefs['x1'][0] * x1 - coefs['i'][0]) / coefs['x2'][0] sns.lineplot(x=x1, y=x2, color='tab:gray') # plot the contours x1 = np.linspace(0, 5, 50) x2 = np.linspace(0, 5, 50) xx1, xx2 = np.meshgrid(x1, x2) df = pd.DataFrame({'i' : np.repeat(1, xx1.ravel().shape[0]), 'x1' : xx1.ravel(), 'x2' : xx2.ravel()}) zz = df @ coefs.T plt.contourf(xx1, xx2, zz.values.reshape(xx1.shape), cmap='coolwarm', alpha=0.5) plt.colorbar()