In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

from scipy.io import loadmat
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from scipy import linalg

K-means on example data

In [2]:
data = loadmat('data/data.mat')
data.keys()
Out[2]:
dict_keys(['__header__', '__version__', '__globals__', 'X'])
In [3]:
X = data['X']
print('X:', X.shape)
X: (300, 2)
In [4]:
km1 = KMeans(3)
km1.fit(X)
Out[4]:
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=3, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)
In [5]:
plt.scatter(X[:,0], X[:,1], s=40, c=km1.labels_, cmap=plt.cm.prism) 
plt.title('K-Means Clustering Results with K=3')
plt.scatter(km1.cluster_centers_[:,0], km1.cluster_centers_[:,1], marker='+', s=100, c='k', linewidth=2);

Image compression with K-means

In [6]:
img = plt.imread('data/bird_small.png')
img_shape = img.shape
img_shape
Out[6]:
(128, 128, 3)
In [8]:
A = img/255.0
In [9]:
AA = A.reshape(128*128,3)
AA.shape
Out[9]:
(16384, 3)
In [10]:
km2 = KMeans(16)
km2.fit(AA)
Out[10]:
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=16, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)
In [11]:
B = km2.cluster_centers_[km2.labels_].reshape(img_shape[0], img_shape[1], 3)
In [12]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(13,9))
ax1.imshow(img)
ax1.set_title('Original')
ax2.imshow(B*255)
ax2.set_title('Compressed, with 16 colors')

for ax in fig.axes:
    ax.axis('off')