import numpy as np import matplotlib.pyplot as plt np.random.seed(370) x = np.random.randn(2,100) x[0,:] += 0.2*x[1,:] x += 2*np.random.rand(2,1) x_mean = np.mean(x,axis=1) plt.figure(figsize=(6,6)) plt.plot(x[0,:], x[1,:], 'o', alpha=0.5, label='data') plt.plot(x_mean[0], x_mean[1], 'o', label=r'$\theta$', markersize=10) plt.xlim([-6, 6]), plt.ylim([-6, 6]) plt.legend(), plt.grid() plt.show() np.random.seed(370) x1 = np.random.randn(2,100) x1[0,:] += 0.2*x1[1,:] x1 += 2*np.random.rand(2,1) x2 = np.random.randn(2,50) x2 *= 0.6 x2 += np.array([[-1],[3]]) x3 = np.random.randn(2,200) x3[0,:] -= 1*x3[1,:] x3 += -3*np.random.rand(2,1) x = np.hstack((x1,x2,x3)) x1_mean = np.mean(x1,axis=1) x2_mean = np.mean(x2,axis=1) x3_mean = np.mean(x3,axis=1) plt.figure(figsize=(6,6)) plt.plot(x[0,:], x[1,:], 'o', alpha=0.5, label='data') plt.plot(x1_mean[0], x1_mean[1], 'o', label=r'$\theta_1$', markersize=10) plt.plot(x2_mean[0], x2_mean[1], 'o', label=r'$\theta_2$', markersize=10) plt.plot(x3_mean[0], x3_mean[1], 'o', label=r'$\theta_3$', markersize=10) plt.xlim([-6, 6]), plt.ylim([-6, 6]) plt.legend(), plt.grid() plt.show() np.random.seed(370) x = np.random.randn(2,100) x[0,:] += 0.2*x[1,:] x += 2*np.random.rand(2,1) x_mean = np.mean(x,axis=1) plt.figure(figsize=(6,6)) plt.plot(x[0,:], x[1,:], 'o', alpha=0.5, label='data') plt.plot(x_mean[0], x_mean[1], 'o', label=r'$\theta$', markersize=10) x_2 = 3 plt.axhline(x_2, color='r') plt.text(6.1, x_2, fr'$x=( ?, {x_2} )$', fontsize=12) plt.plot(x_mean[0], x_2, 'o', label=fr'$\hat x=({x_mean[0]:4.2f},{x_2})$', markersize=10) x_2 = -1 plt.axhline(x_2, color='r') plt.text(6.1, x_2, fr'$x=( ?, {x_2} )$', fontsize=12) plt.plot(x_mean[0], x_2, 'o', label=fr'$\hat x=({x_mean[0]:4.2f},{x_2})$', markersize=10) plt.xlim([-6, 6]), plt.ylim([-6, 6]) plt.legend(), plt.grid() plt.show() print (f'theta:{x_mean}') np.random.seed(370) x1 = np.random.randn(2,100) x1[0,:] += 0.2*x1[1,:] x1 += 2*np.random.rand(2,1) x2 = np.random.randn(2,50) x2 *= 0.6 x2 += np.array([[-1],[3]]) x3 = np.random.randn(2,200) x3[0,:] -= 1*x3[1,:] x3 += -3*np.random.rand(2,1) x = np.hstack((x1,x2,x3)) x1_mean = np.mean(x1,axis=1) x2_mean = np.mean(x2,axis=1) x3_mean = np.mean(x3,axis=1) plt.figure(figsize=(6,6)) plt.plot(x[0,:], x[1,:], 'o', alpha=0.5, label='data') plt.plot(x1_mean[0], x1_mean[1], 'o', label=r'$\theta_1$', markersize=10) plt.plot(x2_mean[0], x2_mean[1], 'o', label=r'$\theta_2$', markersize=10) plt.plot(x3_mean[0], x3_mean[1], 'o', label=r'$\theta_3$', markersize=10) x_2 = -2 plt.axhline(x_2, color='r') plt.text(6.1, x_2, fr'$x=( ?, {x_2} )$', fontsize=12) plt.plot(x3_mean[0], x_2, 'o', label=fr'$\hat x=({x3_mean[0]:4.2f},{x_2})$', markersize=10) x_2 = 0 plt.axhline(x_2, color='r') plt.text(6.1, x_2, fr'$x=( ?, {x_2} )$', fontsize=12) plt.plot(x1_mean[0], x_2, 'o', label=fr'$\hat x=({x1_mean[0]:4.2f},{x_2})$', markersize=10) plt.xlim([-6, 6]), plt.ylim([-6, 6]) plt.legend(), plt.grid() plt.show() plt.figure(figsize=(6,6)) plt.plot(x[0,:], x[1,:], 'o', alpha=0.5, label='data') plt.xlim([-6, 6]), plt.ylim([-6, 6]) plt.legend(), plt.grid() plt.show() np.random.seed(2) k = 3 d, n = x.shape c = np.random.randint(k, size=(n,)) # initialization theta = x[:,np.random.permutation(n)[:k]] plt.figure(figsize=(18,12)) maxiter = 6 for l in range(maxiter): # assignment step dist = np.zeros((k,n)) for i in range(k): dist[i,:] = np.linalg.norm((x - theta[:,i].reshape(d,1)), axis=0) c = np.argmin(dist, axis=0) plt.subplot(2,3,l+1) for i in range(k): plt.plot(x[0,c==i], x[1,c==i], 'o', alpha=0.5) plt.plot(theta[0,i], theta[1,i], 'o', markersize=10) plt.xlim([-6, 6]), plt.ylim([-6, 6]) plt.title(f'iteration number: {l}'), plt.grid() # update step for i in range(k): theta[:,i] = np.mean(x[:,c==i], axis=1) plt.xlim([-6, 6]) plt.ylim([-6, 6]) plt.show() import sklearn.cluster as skc model = skc.KMeans(n_clusters=k, max_iter=6).fit(x.T) theta = model.cluster_centers_.T c = model.labels_ plt.figure(figsize=(6,6)) for i in range(k): plt.plot(x[0,c==i], x[1,c==i], 'o', alpha=0.5) plt.plot(theta[0,i], theta[1,i], 'o', markersize=10) plt.xlim([-6, 6]), plt.ylim([-6, 6]) plt.title(r'$k$-means via scikit-learn package'), plt.grid() plt.xlim([-6, 6]) plt.ylim([-6, 6]) plt.show()