Exercise 2.01¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv("circles.csv")
plt.scatter(data.iloc[:,0], data.iloc[:,1])
plt.show()

In [3]:
plt.hist(data.iloc[:,0])
plt.show()


Exercise 2.02¶

In [4]:
from sklearn.cluster import KMeans

In [5]:
ideal_k = []
for i in range(1,21):
est_kmeans = KMeans(n_clusters=i, random_state=0)
est_kmeans.fit(data)

ideal_k.append([i,est_kmeans.inertia_])

In [6]:
ideal_k = np.array(ideal_k)

In [7]:
plt.plot(ideal_k[:,0],ideal_k[:,1])
plt.show()


A number of cluster of 5 was selected

In [8]:
est_kmeans = KMeans(n_clusters=5, random_state=0)
est_kmeans.fit(data)
pred_kmeans = est_kmeans.predict(data)

In [9]:
plt.scatter(data.iloc[:,0], data.iloc[:,1], c=pred_kmeans)
plt.show()


Exercise 2.03¶

In [10]:
from sklearn.cluster import MeanShift

In [11]:
est_meanshift = MeanShift(0.5)
est_meanshift.fit(data)
pred_meanshift = est_meanshift.predict(data)

In [12]:
plt.scatter(data.iloc[:,0], data.iloc[:,1], c=pred_meanshift)
plt.show()

In [ ]: