#!/usr/bin/env python
# coding: utf-8
#
Toronto Neighborhood Map
# Step 1
# ***Textbook Created***
# Step 2
# In[109]:
import numpy as np
import pandas as pd
import requests
import folium
get_ipython().system('pip install lxml')
get_ipython().system('pip install beautifulsoup4')
get_ipython().system('pip install geocoder')
# In[94]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head(15)
# In[95]:
df.dtypes
# Step 3
# In[99]:
#Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned. - [X]
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)
#More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page,
#you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park.
#These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in
#the above table.
df['Neighborhood'] = df.groupby("Postal Code")["Neighborhood"].transform(lambda neigh: ', '.join(neigh))
#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df['Neighborhood'].replace("Not assigned", df["Borough"],inplace=True)
# In[102]:
df.shape
# In[103]:
df.head()
# Stage 2
# In[104]:
# Now that you have built a dataframe of the postal code of each neighborhood along with
# the borough name and neighborhood name, in order to utilize the Foursquare location data,
# we need to get the latitude and the longitude coordinates of each neighborhood.
df2 = pd.read_csv("https://cocl.us/Geospatial_data")
df2.head()
# In[105]:
df2.dtypes
# In[108]:
df3 = pd.merge(df, df2, on='Postal Code')
df3.head(15)
# Stage 3
# In[111]:
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
get_ipython().system('conda install -c conda-forge geopy --yes')
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
get_ipython().system('conda install -c conda-forge folium=0.5.0 --yes')
import folium
print('Libraries imported.')
# In[121]:
print('The dataframe has {} boroughs and {} postal codes.'.format(
len(df3['Borough'].unique()),
df3.shape[0]))
# In[122]:
address = 'Toronto, CA'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))
# In[124]:
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood']):
label = '{}, {}'.format(neighborhood, borough)
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
[lat, lng],
radius=5,
popup=label,
color='blue',
fill=True,
fill_color='#3186cc',
fill_opacity=0.7,
parse_html=False).add_to(Toronto_map)
Toronto_map
# In[ ]: