Exercise 2.01

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [2]:
data = pd.read_csv("circles.csv")
plt.scatter(data.iloc[:,0], data.iloc[:,1])
plt.show()
In [3]:
plt.hist(data.iloc[:,0])
plt.show()

Exercise 2.02

In [4]:
from sklearn.cluster import KMeans
In [5]:
ideal_k = []
for i in range(1,21):
    est_kmeans = KMeans(n_clusters=i, random_state=0)
    est_kmeans.fit(data)
    
    ideal_k.append([i,est_kmeans.inertia_])
In [6]:
ideal_k = np.array(ideal_k)
In [7]:
plt.plot(ideal_k[:,0],ideal_k[:,1])
plt.show()

A number of cluster of 5 was selected

In [8]:
est_kmeans = KMeans(n_clusters=5, random_state=0)
est_kmeans.fit(data)
pred_kmeans = est_kmeans.predict(data)
In [9]:
plt.scatter(data.iloc[:,0], data.iloc[:,1], c=pred_kmeans)
plt.show()

Exercise 2.03

In [10]:
from sklearn.cluster import MeanShift
In [11]:
est_meanshift = MeanShift(0.5)
est_meanshift.fit(data)
pred_meanshift = est_meanshift.predict(data)
In [12]:
plt.scatter(data.iloc[:,0], data.iloc[:,1], c=pred_meanshift)
plt.show()
In [ ]: