import numpy as np
import pandas as pd
import requests
import folium
!pip install lxml
!pip install beautifulsoup4
!pip install geocoder
Requirement already satisfied: lxml in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (4.5.1) Requirement already satisfied: beautifulsoup4 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (4.9.1) Requirement already satisfied: soupsieve>1.2 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from beautifulsoup4) (2.0.1) Requirement already satisfied: geocoder in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (1.38.1) Requirement already satisfied: click in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from geocoder) (7.1.2) Requirement already satisfied: requests in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from geocoder) (2.23.0) Requirement already satisfied: ratelim in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from geocoder) (0.1.6) Requirement already satisfied: six in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from geocoder) (1.14.0) Requirement already satisfied: future in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from geocoder) (0.18.2) Requirement already satisfied: chardet<4,>=3.0.2 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests->geocoder) (3.0.4) Requirement already satisfied: certifi>=2017.4.17 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests->geocoder) (2020.4.5.1) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests->geocoder) (1.25.9) Requirement already satisfied: idna<3,>=2.5 in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from requests->geocoder) (2.9) Requirement already satisfied: decorator in /home/jupyterlab/conda/envs/python/lib/python3.6/site-packages (from ratelim->geocoder) (4.4.2)
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')[0]
df.head(15)
Postal Code | Borough | Neighborhood | |
---|---|---|---|
0 | M1A | Not assigned | Not assigned |
1 | M2A | Not assigned | Not assigned |
2 | M3A | North York | Parkwoods |
3 | M4A | North York | Victoria Village |
4 | M5A | Downtown Toronto | Regent Park, Harbourfront |
5 | M6A | North York | Lawrence Manor, Lawrence Heights |
6 | M7A | Downtown Toronto | Queen's Park, Ontario Provincial Government |
7 | M8A | Not assigned | Not assigned |
8 | M9A | Etobicoke | Islington Avenue, Humber Valley Village |
9 | M1B | Scarborough | Malvern, Rouge |
10 | M2B | Not assigned | Not assigned |
11 | M3B | North York | Don Mills |
12 | M4B | East York | Parkview Hill, Woodbine Gardens |
13 | M5B | Downtown Toronto | Garden District, Ryerson |
14 | M6B | North York | Glencairn |
df.dtypes
Postal Code object Borough object Neighborhood object dtype: object
#Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned. - [X]
df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)
#More than one neighborhood can exist in one postal code area. For example, in the table on the Wikipedia page,
#you will notice that M5A is listed twice and has two neighborhoods: Harbourfront and Regent Park.
#These two rows will be combined into one row with the neighborhoods separated with a comma as shown in row 11 in
#the above table.
df['Neighborhood'] = df.groupby("Postal Code")["Neighborhood"].transform(lambda neigh: ', '.join(neigh))
#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df['Neighborhood'].replace("Not assigned", df["Borough"],inplace=True)
df.shape
(103, 3)
df.head()
Postal Code | Borough | Neighborhood | |
---|---|---|---|
2 | M3A | North York | Parkwoods |
3 | M4A | North York | Victoria Village |
4 | M5A | Downtown Toronto | Regent Park, Harbourfront |
5 | M6A | North York | Lawrence Manor, Lawrence Heights |
6 | M7A | Downtown Toronto | Queen's Park, Ontario Provincial Government |
# Now that you have built a dataframe of the postal code of each neighborhood along with
# the borough name and neighborhood name, in order to utilize the Foursquare location data,
# we need to get the latitude and the longitude coordinates of each neighborhood.
df2 = pd.read_csv("https://cocl.us/Geospatial_data")
df2.head()
Postal Code | Latitude | Longitude | |
---|---|---|---|
0 | M1B | 43.806686 | -79.194353 |
1 | M1C | 43.784535 | -79.160497 |
2 | M1E | 43.763573 | -79.188711 |
3 | M1G | 43.770992 | -79.216917 |
4 | M1H | 43.773136 | -79.239476 |
df2.dtypes
Postal Code object Latitude float64 Longitude float64 dtype: object
df3 = pd.merge(df, df2, on='Postal Code')
df3.head(15)
Postal Code | Borough | Neighborhood | Latitude | Longitude | |
---|---|---|---|---|---|
0 | M3A | North York | Parkwoods | 43.753259 | -79.329656 |
1 | M4A | North York | Victoria Village | 43.725882 | -79.315572 |
2 | M5A | Downtown Toronto | Regent Park, Harbourfront | 43.654260 | -79.360636 |
3 | M6A | North York | Lawrence Manor, Lawrence Heights | 43.718518 | -79.464763 |
4 | M7A | Downtown Toronto | Queen's Park, Ontario Provincial Government | 43.662301 | -79.389494 |
5 | M9A | Etobicoke | Islington Avenue, Humber Valley Village | 43.667856 | -79.532242 |
6 | M1B | Scarborough | Malvern, Rouge | 43.806686 | -79.194353 |
7 | M3B | North York | Don Mills | 43.745906 | -79.352188 |
8 | M4B | East York | Parkview Hill, Woodbine Gardens | 43.706397 | -79.309937 |
9 | M5B | Downtown Toronto | Garden District, Ryerson | 43.657162 | -79.378937 |
10 | M6B | North York | Glencairn | 43.709577 | -79.445073 |
11 | M9B | Etobicoke | West Deane Park, Princess Gardens, Martin Grov... | 43.650943 | -79.554724 |
12 | M1C | Scarborough | Rouge Hill, Port Union, Highland Creek | 43.784535 | -79.160497 |
13 | M3C | North York | Don Mills | 43.725900 | -79.340923 |
14 | M4C | East York | Woodbine Heights | 43.695344 | -79.318389 |
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import json
!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
import requests
from pandas.io.json import json_normalize
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes
import folium
print('Libraries imported.')
Collecting package metadata (current_repodata.json): done Solving environment: done ## Package Plan ## environment location: /home/jupyterlab/conda/envs/python added / updated specs: - geopy The following packages will be downloaded: package | build ---------------------------|----------------- ca-certificates-2020.4.5.2 | hecda079_0 147 KB conda-forge certifi-2020.4.5.2 | py36h9f0ad1d_0 152 KB conda-forge geographiclib-1.50 | py_0 34 KB conda-forge geopy-1.22.0 | pyh9f0ad1d_0 63 KB conda-forge ------------------------------------------------------------ Total: 395 KB The following NEW packages will be INSTALLED: geographiclib conda-forge/noarch::geographiclib-1.50-py_0 geopy conda-forge/noarch::geopy-1.22.0-pyh9f0ad1d_0 The following packages will be UPDATED: ca-certificates 2020.4.5.1-hecc5488_0 --> 2020.4.5.2-hecda079_0 certifi 2020.4.5.1-py36h9f0ad1d_0 --> 2020.4.5.2-py36h9f0ad1d_0 Downloading and Extracting Packages geopy-1.22.0 | 63 KB | ##################################### | 100% certifi-2020.4.5.2 | 152 KB | ##################################### | 100% ca-certificates-2020 | 147 KB | ##################################### | 100% geographiclib-1.50 | 34 KB | ##################################### | 100% Preparing transaction: done Verifying transaction: done Executing transaction: done Collecting package metadata (current_repodata.json): done Solving environment: failed with initial frozen solve. Retrying with flexible solve. Collecting package metadata (repodata.json): done Solving environment: | ^C failed with initial frozen solve. Retrying with flexible solve. CondaError: KeyboardInterrupt Libraries imported.
print('The dataframe has {} boroughs and {} postal codes.'.format(
len(df3['Borough'].unique()),
df3.shape[0]))
The dataframe has 10 boroughs and 103 postal codes.
address = 'Toronto, CA'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))
The geograpical coordinate of Toronto are 43.6534817, -79.3839347.
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(df3['Latitude'], df3['Longitude'], df3['Borough'], df3['Neighborhood']):
label = '{}, {}'.format(neighborhood, borough)
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
[lat, lng],
radius=5,
popup=label,
color='blue',
fill=True,
fill_color='#3186cc',
fill_opacity=0.7,
parse_html=False).add_to(Toronto_map)
Toronto_map