import pandas as pd
#Use the path to your own file
#If the file is in the current working directory, you just need the filename
#!pwd on Mac, !cd on Windows to find the current working directory, remember...
#Note that depending on your machine settings, sometimes when you look at the file in a file browser,
## the file suffix (.csv) may not be displayed.. But it is still there and does need adding to the filename
cceats=pd.read_csv('/Users/ajh59/Downloads/Code-Club-s-Fave-Restaurants-Sheet1-csv.csv')
cceats
#http://stackoverflow.com/questions/3828723/why-we-need-sys-setdefaultencodingutf-8-in-a-py-script
#If you get the ascii/utf-8 error, uncomment and run the following
#import sys
#reload(sys)
#sys.setdefaultencoding("utf-8")
#NOTE - you should only need to do this once
#This should work - uncomment (remove the #) and run the following command line command:
#!pip install folium
#NOTE - you only need to install the package once.
#What the command does is fetch the package files from an online directory of python packages and then
## install them into the python distribution on your own computer.
#If it doesn't work (i.e. you get error messages...) then try to run the command directly:
## - open a terminal/command prompt
## - go to the Anaconda directory
## - go into the bin directory (which is where the pip command lives...) and run:
#pip install folium
#Having installed the package, we can load it in to the notebook
import folium
#If the package has not been installed, you will see something like:
#ImportError: No module named folium
#If you ever see that sort of error, you need to install the missing package...
#So if you try to run:
#import foo
#and see: ImportError: No module named foo
#what do you need to do?
#Try: pip install foo
#Programmers share code - and reuse each others' code - all the time...
#(that's partly what libraries and packages are about).
#Copy and paste the folium_base.py code here and run the cell
from IPython.display import HTML
import folium
def inline_map(map):
"""
Embeds the HTML source of the map directly into the IPython notebook.
This method will not work if the map depends on any files (json data). Also this uses
the HTML5 srcdoc attribute, which may not be supported in all browsers.
"""
map._build_map()
return HTML(''.format(srcdoc=map.HTML.replace('"', '"')))
def embed_map(map, path="map.html"):
"""
Embeds a linked iframe to the map into the IPython notebook.
Note: this method will not capture the source of the map into the notebook.
This method should work for all maps (as long as they use relative urls).
"""
map.create_map(path=path)
return HTML(''.format(path=path))
cceats
fmap=folium.Map(location=[51.5, 0])
def plotmarker(row):
fmap.simple_marker( [row['latitude'], row['longitude']] )
# "for" loops are common to a wide variety of proramming languages, allowing you to do something a particular number
# of times or for each item in a list or set of things.
#The iterrows() method enables you to iterate through each row in the dataframe.
#This allows you to do something to each row in turn
#iterrows() actually returns a couple of items at each pass - the row index value, and the row values by column name
#We want to access the second of those items, the row values by column name, so count to the second item: 0,1,..
#Once we have that second item, we need to say which column value we want from the row
for row in cceats.iterrows():
#The 'latitude' and 'longitude' names correspond to column names in the original cceats dataframe
latlon = [ row[1]['latitude'], row[1]['longitude'] ]
fmap.simple_marker( latlon )
embed_map(fmap)
lat=cceats['latitude'].mean()
lon=cceats['longitude'].mean()
fmap=folium.Map(location=[lat, lon], zoom_start=9)
def plotmarker(row):
fmap.simple_marker( [row['latitude'], row['longitude']] )
cceats.apply( plotmarker, axis=1)
inline_map(fmap)
fmap=folium.Map(location=[lat, lon], zoom_start=9)
for row in cceats.iterrows():
latlon = [ row[1]['latitude'], row[1]['longitude'] ]
fmap.simple_marker( latlon, popup='This is my label' )
inline_map(fmap)
fmap=folium.Map(location=[lat, lon], zoom_start=9)
for row in cceats.iterrows():
latlon = [ row[1]['latitude'], row[1]['longitude'] ]
fmap.simple_marker( latlon, popup=row[1]['Name'] )
inline_map(fmap)
"This is my string.".format()
print("This is my {} string.".format("variable"))
print("This is my {} string{}.".format("variable",", okay?"))
print("This is my {var1} string{other}.".format(other="variable",var1=", okay?"))
#Stuck? HINT: popup='Name: {name}'.format(name=row[1]['Name'])
def patcher(fn='map.html'):
f=open(fn,'r')
html=f.read()
f.close()
html=html.replace('"//','"http://')
f=open(fn,'w')
f.write(html)
f.close()
#Run the patcher - by default, the file we look for is map.html in the current working directory
patcher()
#You should now be able to double click on the file to open and view it correctly in your browser, share it by email etc
import requests
#http://api.ratings.food.gov.uk/help
#http://docs.python-requests.org/en/latest/user/quickstart/
params={'name':"McDonald's",'address':'SW12 9AU'}
r=requests.get('http://api.ratings.food.gov.uk/Establishments',
headers={"x-api-version":2},
params=params)
r.content
import json
j=json.loads(r.content)
j
j['establishments'][0]['BusinessName']
j['establishments'][0]['geocode']['latitude']
https://docs.google.com/spreadsheets/d//export?gid=SHEETNUMBER&format=csv
url='https://docs.google.com/a/okfn.org/spreadsheets/d/1M14S4hqG4F5P8H78VdOMMeoITOPBpVZEGoiCvXEFBQg/export?gid=0&format=csv'
cceats_google=pd.read_csv(url)
cceats_google
#Be lazy... we can turn the original example of calling the FSA website into a function
def getFoodRatingData(name,address):
params={'name':name,'address':address}
r=requests.get('http://api.ratings.food.gov.uk/Establishments',
headers={"x-api-version":2},
params=params)
return r
tmp=getFoodRatingData("Mcdonald's","SW12 9AU")
tmp.content
tmp=getFoodRatingData('Sacro Cuore','NW10 3NB')
tmp.content
#The pandas DataFrame .append() method can be used to add a python dict to a dataframe
stuff={'book':'War and Peace', 'opinion':'too long'}
df_tmp = pd.DataFrame()
df_tmp = df_tmp.append(stuff,ignore_index=True)
df_tmp
df_tmp=df_tmp.append({'opinion':'cracking read','book':'Flash Boys'},ignore_index=True)
df_tmp
df_tmp.append(df_tmp)
#Let's parse a json response from the FSA API as a function
def parseFoodRatingData(jdata):
df=pd.DataFrame()
#The FSA return a list of establishments, though the list may only contain one establishment
#Generate one row per establishment we get back
for establishment in jdata['establishments']:
#Create an empty dict to hold the data we want from the FSA API
info={}
#Here are some of the data items I want
for item in ['BusinessName','FHRSID','PostCode','RatingValue','RatingDate']:
#Take those items from the data returned from the FSA and put them into my 'useful data' dict
info[item]= establishment[item]
#We can also iterate through the items contained nested elements of the FSA data dict
for item in establishment['geocode']:
#..that is, the latitude and longitude elements...
info[item]= establishment['geocode'][item]
for item in establishment['scores']:
#..and here we grab the individual score components
info[item]= establishment['scores'][item]
#Now use the data we grabbed as the basis for a dataframe row
df=df.append(info,ignore_index=True)
return df
parseFoodRatingData(jdata)
#Let's simplify further - create another function that:
#-- gets the data from the FSA website
#-- parses it
#--returns it as a dataframe
def getAndParseFoodRatingData(name,address):
r=getFoodRatingData(name,address)
jdata=json.loads(r.content)
df=parseFoodRatingData(jdata)
return df
getAndParseFoodRatingData('Sacro Cuore','NW10 3NB')
#TRY HACKING SOMETHING TO SEE IF YOU CAN FIGURE OUT A WAY OF DOING IT..
#Here's one solution
#Create a dummy dataframe to put stuff into
cceats_fsa=pd.DataFrame()
#Iterate through each eatery in the data we grabbed from the Google spreadsheet
for place in cceats_google.iterrows():
#Using the name and postcode, grab the FSA rating for that establishment and add it to the growing cceats_fsa dataframe
cceats_fsa=cceats_fsa.append(getAndParseFoodRatingData(place[1]['Name'],place[1]['Postcode']),ignore_index=True)
cceats_fsa
cceats_bigdata=pd.merge(cceats_fsa,cceats_google,left_on='PostCode',right_on='Postcode')
cceats_bigdata
#Display the dataframe using reordered columns
cceats_bigdata[['FHRSID','BusinessName','Person recommended','Type of food','PostCode','RatingDate','RatingValue','latitude','longitude',
'Structural','Hygiene','ConfidenceInManagement']]