import pandas as pd
dataframe = pd.read_csv("JaipurFinalCleanData.csv")
print (dataframe.head())
date mean_temperature max_temperature min_temperature \ 0 2016-05-04 34 41 27 1 2016-05-05 31 38 24 2 2016-05-06 28 34 21 3 2016-05-07 30 38 23 4 2016-05-08 34 41 26 Mean_dew_pt mean_pressure max_humidity min_humidity max_dew_pt_1 \ 0 6 1006.00 27 5 12 1 7 1005.65 29 6 13 2 11 1007.94 61 13 16 3 13 1008.39 69 18 17 4 10 1007.62 50 8 14 max_dew_pt_2 min_dew_pt_1 min_dew_pt_2 max_pressure_1 max_pressure_2 \ 0 10 -2 -2 1009 1008 1 12 0 -2 1008 1009 2 13 6 0 1011 1008 3 16 9 6 1011 1011 4 17 6 9 1010 1011 min_pressure_1 min_pressure_2 rainfall 0 1000 1001 0.0 1 1001 1000 0.0 2 1003 1001 5.0 3 1004 1003 0.0 4 1002 1004 0.0
dataframe.dtypes
date object mean_temperature int64 max_temperature int64 min_temperature int64 Mean_dew_pt int64 mean_pressure float64 max_humidity int64 min_humidity int64 max_dew_pt_1 int64 max_dew_pt_2 int64 min_dew_pt_1 int64 min_dew_pt_2 int64 max_pressure_1 int64 max_pressure_2 int64 min_pressure_1 int64 min_pressure_2 int64 rainfall float64 dtype: object
dataframe = dataframe.drop(["max_dew_pt_2"], axis=1)
dataframe.head()
date | mean_temperature | max_temperature | min_temperature | Mean_dew_pt | mean_pressure | max_humidity | min_humidity | max_dew_pt_1 | min_dew_pt_1 | min_dew_pt_2 | max_pressure_1 | max_pressure_2 | min_pressure_1 | min_pressure_2 | rainfall | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-05-04 | 34 | 41 | 27 | 6 | 1006.00 | 27 | 5 | 12 | -2 | -2 | 1009 | 1008 | 1000 | 1001 | 0.0 |
1 | 2016-05-05 | 31 | 38 | 24 | 7 | 1005.65 | 29 | 6 | 13 | 0 | -2 | 1008 | 1009 | 1001 | 1000 | 0.0 |
2 | 2016-05-06 | 28 | 34 | 21 | 11 | 1007.94 | 61 | 13 | 16 | 6 | 0 | 1011 | 1008 | 1003 | 1001 | 5.0 |
3 | 2016-05-07 | 30 | 38 | 23 | 13 | 1008.39 | 69 | 18 | 17 | 9 | 6 | 1011 | 1011 | 1004 | 1003 | 0.0 |
4 | 2016-05-08 | 34 | 41 | 26 | 10 | 1007.62 | 50 | 8 | 14 | 6 | 9 | 1010 | 1011 | 1002 | 1004 | 0.0 |
from sklearn.cluster import KMeans
import numpy as np
cols= np.column_stack((dataframe.iloc[:,1:].values))
km_res = KMeans(n_clusters=3).fit(cols) #excluding date column
km_res.cluster_centers_
array([[ 27. , 29. , 61. , ..., 42. , 37. , 38. ], [1004.8 , 1004.73 , 1006.188 , ..., 1014.082 , 1013.632 , 1013.152 ], [ 13.44444444, 13. , 14.88888889, ..., 8.88888889, 10. , 10.55555556]])
km_res.fit_predict(dataframe.iloc[:,1:])
array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2])
km_res.fit_predict(dataframe.iloc[:,1:]).shape
(676,)
dataframe.iloc[:,1:].shape
(676, 15)