from google.colab import drive
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

drive.mount('/content/gdrive')

df = pd.read_csv("/content/gdrive/My Drive/datasets/nyc_italian.csv")

# Save a copy of the orignal df before you do any transformation to it, we'll want this later for plotting.
original_df = df.copy()

# Inspect it with df.head()
print("Sample:")
display(df.sample(5))

X = df[["Price", "Food", "Decor", "Service", "East", "latitude", "longitude"]]

# Fit scaler and transform
scaler = StandardScaler()
X = pd.DataFrame(
    scaler.fit_transform(X),
    columns=X.columns,
    index=X.index
)

# Determine best PCA
pca = PCA(n_components=None)
pca_temp = pca.fit(X)
n_components = sum(pca_temp.explained_variance_ratio_ > 0.1)
print(f"Number of components with variance > 0.1: {n_components}")

# Now fit PCA with optimal components
pca = PCA(n_components=n_components)
X = pd.DataFrame(
    pca.fit_transform(X),
    index=X.index
)

n_clusters = 4
kmeans = KMeans(n_clusters=n_clusters, random_state=42).fit(X)
train_clusters = kmeans.labels_

original_df["kmeans"] = kmeans.labels_
clusters_df = original_df.drop(["Case", "Restaurant"], axis=1).groupby("kmeans").mean()
print("Clusters:")
display(clusters_df)

import seaborn as sns
import matplotlib.pyplot as plt
import folium

fmap = folium.Map(location=[40.7128, -74.0060], zoom_start=12)
colors = ['beige', 'lightblue', 'gray', 'blue', 'darkred', 'lightgreen', 'purple', 'red', 'green', 'lightred', 'white', 'darkblue', 'darkpurple', 'cadetblue', 'orange', 'pink', 'lightgray', 'darkgreen']

# Plot each entry in df by it's latitude and longitude on the folium map
for index, row in original_df.iterrows():
    color = colors[kmeans.labels_[index]]
    description = f"{row['Restaurant']} price={row['Price']} food={row['Food']} decor={row['Decor']} service={row['Service']}"
    folium.Marker([row["latitude"], row["longitude"]], popup=description, icon=folium.Icon(color=color)).add_to(fmap)

# Display the map
display(fmap)