%reload_ext autoreload
%autoreload 2
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import sys
sys.path.append('..')
from helper import kmeans as km
ndarray
¶from skimage import io
# cast to float, you need to do this otherwise the color would be weird after clustring
pic = io.imread('data/bird_small.png') / 255.
io.imshow(pic)
<matplotlib.image.AxesImage at 0x112c14550>
pic.shape
(128, 128, 3)
# serialize data
data = pic.reshape(128*128, 3)
my version will take more than 10 mins... ok. I know why I shouldn't implement my own ML library.
In the future I will only implement ML algorithm for the sake of learning it XD
# C, centroids, cost = km.k_means(pd.DataFrame(data), 16, epoch = 10, n_init=3)
from sklearn.cluster import KMeans
model = KMeans(n_clusters=16, n_init=100, n_jobs=-1)
model.fit(data)
KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=16, n_init=100, n_jobs=-1, precompute_distances='auto', random_state=None, tol=0.0001, verbose=0)
centroids = model.cluster_centers_
print(centroids.shape)
C = model.predict(data)
print(C.shape)
(16, 3) (16384,)
centroids[C].shape
(16384, 3)
compressed_pic = centroids[C].reshape((128,128,3))
fig, ax = plt.subplots(1, 2)
ax[0].imshow(pic)
ax[1].imshow(compressed_pic)
<matplotlib.image.AxesImage at 0x106b640b8>