Code Club Week 7¶

Regular Expressions¶

In [ ]:

import re
?re

In [ ]:

txt= 'The cat sat on the mat 11 times.'
pattern=re.compile(r'^The (\w*)')
pattern.findall(txt)

In [ ]:

txt= 'The cat sat on the mat 11 times.'
pattern=re.compile(r'\s(.at)')
pattern.findall(txt)

In [ ]:

pattern2=re.compile(r'^The (\w*) sat (.*) (\d*) times\.$')
grouper=pattern2.match(txt)
grouper.groups()

In [ ]:

grouper.group(2)

In [ ]:

pattern2.match(txt).group(1)

In [ ]:

whatwhere=re.compile(r'^The (?P<what>\w*) sat (?P<where>.*) (\d*) times\.$')
whatwhere.match(txt).group('where')

In [ ]:

whatwhere.match(txt).group(0)

In [ ]:

whatwhere.match(txt).group(3)

In [ ]:

whatwhere.match(txt).groups()

In [ ]:

re.sub(r'The (\w*) (.*) ([^\s]*) ([\d]{1,}.*)\.',r'The \3 \2 \1? Was it really \4?',txt)

Time¶

In [ ]:

from datetime import datetime

#Time and datetimes can be represented as special datetime objects
now=datetime.now()
now

In [ ]:

#We can index in using .microsecond, .second, .minute, .hour, .day, .month, .year
now.microsecond

In [ ]:

#We can format a date in a variety of ways..

#Reference: https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior

now.strftime("%d/%m/%Y")

In [ ]:

now_sentence=now.strftime("%A, %B %d, %Y (week %W)")
now_sentence

In [ ]:

#Shortcuts
now_str=now.strftime("%F")
now_str

In [ ]:

#Parse the date back
datetime.strptime(now_str,"%Y-%m-%d")

In [ ]:

datetime.strptime(now_sentence,"%A, %B %d, %Y (week %W)")

In [ ]:

from dateutil.parser import parse
?parse

parse('11/2/13',dayfirst=True)

In [ ]:

parse('11-10-09',dayfirst=None,yearfirst=None)

In [ ]:

parse('January 22nd, 1983')

In [ ]:

parse('January 22nd, 1983, 10:23am')

Code Club Eats¶

In [ ]:

import pandas as pd

To grab a Google Spreadsheet file as a CSV file, use the URL pattern (SHEETNUMBER starts at 0):

In [ ]:

url='https://docs.google.com/a/okfn.org/spreadsheets/d/1M14S4hqG4F5P8H78VdOMMeoITOPBpVZEGoiCvXEFBQg/export?gid=0&format=csv'
cceats=pd.read_csv(url)

In [ ]:

cceats

Food Standards Agency - www.food.gov.uk¶

In [ ]:

import requests
import json

In [ ]:

#http://api.ratings.food.gov.uk/help
#http://docs.python-requests.org/en/latest/user/quickstart/

params={'name':"McDonald's",'address':'SW12 9AU'}
r=requests.get('http://api.ratings.food.gov.uk/Establishments',
               headers={"x-api-version":2},
               params=params)
j=json.loads(r.content)
j

In [ ]:

def getFoodRatingData(name,address):
    params={'name':name,'address':address}
    r=requests.get('http://api.ratings.food.gov.uk/Establishments',
                   headers={"x-api-version":2},
                   params=params)
    return r

def parseFoodRatingData(jdata):
    df=pd.DataFrame()
    for establishment in jdata['establishments']:
        info={}
        for item in ['BusinessName','FHRSID','PostCode','RatingValue','RatingDate']:
            info[item]= establishment[item] 
        for item in establishment['geocode']:
            info[item]= establishment['geocode'][item] 
        for item in establishment['scores']:
            info[item]= establishment['scores'][item] 
        df=df.append(info,ignore_index=True)
    return df

def getAndParseFoodRatingData(name,address):
    r=getFoodRatingData(name,address)
    jdata=json.loads(r.content)
    df=parseFoodRatingData(jdata)
    return df

In [ ]:

parseFoodRatingData(j)

In [ ]:

getAndParseFoodRatingData('Sacro Cuore','NW10 3NB')

In [ ]:

adf=pd.DataFrame()
for place in cceats.iterrows():
    adf=adf.append(getAndParseFoodRatingData(place[1]['Name'],place[1]['Postcode']))
adf[['FHRSID','BusinessName','PostCode','RatingDate','RatingValue','latitude','longitude',
               'Structural','Hygiene','ConfidenceInManagement']]

Mapping¶

In [ ]:

#!pip install folium
import folium

In [ ]:

from IPython.display import HTML
import folium

def inline_map(map):
    """
    Embeds the HTML source of the map directly into the IPython notebook.
    
    This method will not work if the map depends on any files (json data). Also this uses
    the HTML5 srcdoc attribute, which may not be supported in all browsers.
    """
    map._build_map()
    return HTML('<iframe srcdoc="{srcdoc}" style="width: 100%; height: 510px; border: none"></iframe>'.format(srcdoc=map.HTML.replace('"', '&quot;')))

def embed_map(map, path="map.html"):
    """
    Embeds a linked iframe to the map into the IPython notebook.
    
    Note: this method will not capture the source of the map into the notebook.
    This method should work for all maps (as long as they use relative urls).
    """
    map.create_map(path=path)
    return HTML('<iframe src="files/{path}" style="width: 100%; height: 510px; border: none"></iframe>'.format(path=path))

In [ ]:

ccd=pd.read_csv('/Users/ajh59/Downloads/Code-Club-s-Fave-Restaurants-Sheet1-csv.csv')
ccd

In [ ]:

fmap=folium.Map(location=[51.5, 0], zoom_start=9)

for row in ccd.iterrows():
    latlon = [ row[1]['latitude'], row[1]['longitude'] ]
    fmap.simple_marker( latlon )
inline_map(fmap)

In [ ]:

fmap=folium.Map(location=[51.5, 0], zoom_start=9)

for row in adf.iterrows():
    latlon = [ row[1]['latitude'], row[1]['longitude'] ]
    fmap.simple_marker( latlon, clustered_marker=True,
                       popup='Name: {name}<br/>Score: {score}'.format(name=row[1]['BusinessName'],
                                                                      score=row[1]['RatingValue']) )
inline_map(fmap)

In [ ]: