import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv("circles.csv")
plt.scatter(data.iloc[:,0], data.iloc[:,1])
plt.show()
plt.hist(data.iloc[:,0])
plt.show()
from sklearn.cluster import KMeans
ideal_k = []
for i in range(1,21):
est_kmeans = KMeans(n_clusters=i, random_state=0)
est_kmeans.fit(data)
ideal_k.append([i,est_kmeans.inertia_])
ideal_k = np.array(ideal_k)
plt.plot(ideal_k[:,0],ideal_k[:,1])
plt.show()
A number of cluster of 5 was selected
est_kmeans = KMeans(n_clusters=5, random_state=0)
est_kmeans.fit(data)
pred_kmeans = est_kmeans.predict(data)
plt.scatter(data.iloc[:,0], data.iloc[:,1], c=pred_kmeans)
plt.show()
from sklearn.cluster import MeanShift
est_meanshift = MeanShift(0.5)
est_meanshift.fit(data)
pred_meanshift = est_meanshift.predict(data)
plt.scatter(data.iloc[:,0], data.iloc[:,1], c=pred_meanshift)
plt.show()