import folium
import matplotlib.colors as colors
import matplotlib.cm as cm
from folium import plugins
from scipy.spatial.distance import cdist, pdist
from sklearn import metrics
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
df_loc = pd.read_csv("map/data/north-york_kmeans.csv")
for index, row in df_loc.iterrows():
df_loc.append([row]*row['severity'])
df_loc.head()
df_loc.loc[df_loc['origin coordinate latitude'] == 43.701330]
X = df_loc[['origin coordinate latitude', 'origin coordinate longitude', 'destination coordinate latitude', 'destination coordinate lontitude']].values
Ks = range(1, 20)
kmean = [KMeans(n_clusters=i).fit(X) for i in Ks]
def plot_elbow(kmean, X):
centroids = [k.cluster_centers_ for k in kmean]
D_k = [cdist(X, center, 'euclidean') for center in centroids]
dist = [np.min(D,axis=1) for D in D_k]
wcss = [sum(d**2) for d in dist]
tss = sum(pdist(X)**2)/X.shape[0]
bss = tss-wcss
plt.subplots(nrows=1, ncols=1, figsize=(8,8))
ax = plt.subplot(1, 1, 1)
ax.plot(Ks, bss/tss*100, 'b*-')
plt.grid(True)
plt.show()
plot_elbow(kmean, X)
map_clusters = folium.Map(location=[43.761539, -79.411079], zoom_start=4)
cx = np.arange(20)
cys = [i + cx + (i*cx)**2 for i in range(20)]
colors_array = cm.rainbow(np.linspace(0, 1, len(cys)))
colos = [colors.rgb2hex(i) for i in colors_array]
def plot_stations_map(df_loc):
markers_colors = []
for lat, lng, cluster in zip(list(df_loc['origin coordinate latitude']), list(df_loc['origin coordinate longitude']), list(df_loc['cluster'])):
folium.vector_layers.CircleMarker(
[lat, lng],
radius=5,
tooltip = 'Cluster ' + str(cluster),
color=colos[cluster],
fill=True,
fill_color=colos[cluster],
fill_opacity=0.1).add_to(map_clusters)
k = [16]
n = len(k)
for i in range(n):
est = kmean[k[i]-1]
df_loc['cluster'] = est.predict(X).tolist()
plot_stations_map(df_loc)
map_clusters
centers = np.array(est.cluster_centers_)
print(centers)
centroid_map = folium.Map(location=[43.761539, -79.411079],
zoom_start=15)
for x1, y1, x2, y2 in centers:
loc = [[x1, y1],[x2, y2]]
#folium.Marker()
folium.PolyLine(loc,
color='red',
weight=4,
opacity=1).add_to(centroid_map)
centroid_map
map_c = folium.Map(location=[43.761539, -79.411079], zoom_start=4)
occurences = folium.map.FeatureGroup()
n_mean = df_loc['severity'].mean()
for lat, lng, cluster, sev in zip(list(df_loc['origin coordinate latitude']), list(df_loc['origin coordinate longitude']), list(df_loc['cluster']), list(df_loc['severity'])):
occurences.add_child(
folium.vector_layers.CircleMarker(
[lat, lng],
radius=sev/n_mean*8,
color='orange',
fill=True,
fill_color='yellow',
fill_opacity=0.3,
tooltip = str(sev)
)
)
map_c.add_child(occurences)