Simple scribbles around Isle of Wight local election candidate data.
Reusing code and ideas from:
Note that this is a bit ropey and requires you to work through the steps.
You may also need to install some additional packages along the way...
#Define scraper sqlite database filename
dbname= "norfolk.sqlite"
#Optionally delete the previous instance
!rm {dbname}
#Filename for map of candidates
mapname='norfolkmap.html'
# The localarea is a search keyword we look for in the address of a companies
# that may be associated with directors with the same exact name as a candidate
# USe it to just limit the dispplay of companies to companies with addresses that contain that keyword
localarea='Norfolk'
#Candidates filename
candsfilename='norfolkcands.csv'
#Supporters filename
supportersfilename='norfolksupporters.csv'
#filename for companies
companiesfilename='norfolkcos.csv'
#Path to the Notice of election file (note, this can be the path/name of a local file)
url='https://www.south-norfolk.gov.uk/sites/default/files/Notice%20of%20Poll.pdf'
#Companies House API token
#Available from: https://developer.companieshouse.gov.uk/api/docs/index/gettingStarted/apikey_authorisation.html
CH_API_TOKEN=''
#Set the path to the pip installer for your Python kernel
#It may be available on your path directly, or you may need to specify a path, as below, or pip3
#pip='~/anaconda/bin/pip'
#pip='pip3'
pip='pip'
#Install required packages
!{pip} install lxml
!{pip} install scraperwiki
!{pip} install pandas
!{pip} install networkx
!{pip} install geocoder
!{pip} install folium
!{pip} install uk-postcode-utils
Requirement already satisfied: lxml in /usr/local/lib/python3.5/site-packages Requirement already satisfied: scraperwiki in /usr/local/lib/python3.5/site-packages Requirement already satisfied: alembic in /usr/local/lib/python3.5/site-packages (from scraperwiki) Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.5/site-packages (from scraperwiki) Requirement already satisfied: six in /usr/local/lib/python3.5/site-packages (from scraperwiki) Requirement already satisfied: requests in /usr/local/lib/python3.5/site-packages (from scraperwiki) Requirement already satisfied: Mako in /usr/local/lib/python3.5/site-packages (from alembic->scraperwiki) Requirement already satisfied: python-editor>=0.3 in /usr/local/lib/python3.5/site-packages (from alembic->scraperwiki) Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.5/site-packages (from Mako->alembic->scraperwiki) Requirement already satisfied: pandas in /usr/local/lib/python3.5/site-packages Requirement already satisfied: pytz>=2011k in /usr/local/lib/python3.5/site-packages (from pandas) Requirement already satisfied: numpy>=1.7.0 in /usr/local/lib/python3.5/site-packages (from pandas) Requirement already satisfied: python-dateutil>=2 in /usr/local/lib/python3.5/site-packages (from pandas) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.5/site-packages (from python-dateutil>=2->pandas) Requirement already satisfied: networkx in /usr/local/lib/python3.5/site-packages Requirement already satisfied: decorator>=3.4.0 in /usr/local/lib/python3.5/site-packages (from networkx) Requirement already satisfied: geocoder in /usr/local/lib/python3.5/site-packages Requirement already satisfied: ratelim in /usr/local/lib/python3.5/site-packages (from geocoder) Requirement already satisfied: six in /usr/local/lib/python3.5/site-packages (from geocoder) Requirement already satisfied: requests in /usr/local/lib/python3.5/site-packages (from geocoder) Requirement already satisfied: click in /usr/local/lib/python3.5/site-packages (from geocoder) Requirement already satisfied: decorator in /usr/local/lib/python3.5/site-packages (from ratelim->geocoder) Requirement already satisfied: folium in /usr/local/lib/python3.5/site-packages Requirement already satisfied: Jinja2 in /usr/local/lib/python3.5/site-packages (from folium) Requirement already satisfied: branca in /usr/local/lib/python3.5/site-packages (from folium) Requirement already satisfied: six in /usr/local/lib/python3.5/site-packages (from folium) Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.5/site-packages (from Jinja2->folium) Requirement already satisfied: uk-postcode-utils in /usr/local/lib/python3.5/site-packages
#Configure name of scraper sqlite database file
import os
os.environ["SCRAPERWIKI_DATABASE_NAME"] ='sqlite:///{}'.format(dbname)
# SCRAPER CODE REUSED FROM A LONG TIME AGO, WITH MINOR TWEAKS
#code from https://classic.scraperwiki.com/scrapers/iw_poll_notices_scrape/
#with a couple of minor tweaks - seach for strings using 'in' rather than 'startwsith'
import scraperwiki
import requests, lxml.etree
#Current local election notice of election PDF for Isle of Wight - looks to be same format-ish as before
#url='https://www.iwight.com/azservices/documents/1174-Notice-of-Poll-IOWC-2017.pdf'
#Read in the Notice of Poll PDF
pdfdata = requests.get(url).content
#Convert PDF to XML - this breaks in Python 3?
xmldata = scraperwiki.pdftoxml(pdfdata)
root = lxml.etree.fromstring(xmldata)
pages = list(root)
# this function has to work recursively because we might have "<b>Part1 <i>part 2</i></b>"
def gettext_with_bi_tags(el):
res = [ ]
if el.text:
res.append(el.text)
for lel in el:
res.append("<%s>" % lel.tag)
res.append(gettext_with_bi_tags(lel))
res.append("</%s>" % lel.tag)
if el.tail:
res.append(el.tail)
return "".join(res).strip()
#Scrape the separate pages
#print(pages)
for page in pages:
data={'stations':[]}
phase=0
for el in page:
#print(el.attrib, gettext_with_bi_tags(el))
if 'Election of' in gettext_with_bi_tags(el):
phase=1
continue
tmp=gettext_with_bi_tags(el).strip()
if phase==1:
if tmp=='':pass
else:
data['ward']=tmp
phase=phase+1
elif phase==2:
if 'Proposers' in tmp:
phase=3
record={'candidate':[],'address':[],'desc':[],'proposers':[],'seconders':[]}
data['list']=[]
continue
elif phase==3:
if tmp.strip()=='':
phase=4
#print('-------------------------------')
data['list'].append(record)
continue
elif int(el.attrib['left'])<100:
if record['address']!=[]:
data['list'].append(record)
record={'candidate':[],'address':[],'desc':[],'proposers':[],'seconders':[]}
record['candidate'].append(tmp)
elif int(el.attrib['left'])<300: record['address'].append(tmp)
elif int(el.attrib['left'])<450: record['desc'].append(tmp)
elif int(el.attrib['left'])<600:
if tmp.startswith('('): record['proposers'][-1]=record['proposers'][-1]+' '+tmp
elif len(record['proposers'])>0 and record['proposers'][-1].strip().endswith('-'): record['proposers'][-1]=record['proposers'][-1]+tmp
elif len(record['proposers'])>0 and record['proposers'][-1].strip().endswith('.'): record['proposers'][-1]=record['proposers'][-1]+' '+tmp
else: record['proposers'].append(tmp)
elif int(el.attrib['left'])<750:
if tmp.startswith('('): record['seconders'][-1]=record['seconders'][-1]+' '+tmp
elif len(record['seconders'])>0 and record['seconders'][-1].strip().endswith('-'): record['seconders'][-1]=record['seconders'][-1]+tmp
elif len(record['seconders'])>0 and record['seconders'][-1].strip().endswith('.'): record['seconders'][-1]=record['seconders'][-1]+' '+tmp
else: record['seconders'].append(tmp)
elif phase==4:
if 'persons entitled to vote' in tmp:
phase=5
record={'station':[],'range':[]}
continue
elif phase==5: #Not implemented... TO DO
#print(el.attrib, gettext_with_bi_tags(el))
if tmp.strip()=='':
data['stations'].append(record)
break #The following bits are broken...
#need to add situation
elif int(el.attrib['left'])<100:
if record['range']!=[]:
data['stations'].append(record)
record={'situation':[],'station':[],'range':[]}
record['station'].append(tmp)
elif int(el.attrib['left'])>300:
record['range'].append(tmp)
#print(data)
tmpdata=[]
for station in data['stations']:
tmpdata.append({'ward':data['ward'],
#'situation':' '.join(station['situation']),
'station':' '.join(station['station']),
'range':' '.join(station['range'])})
scraperwiki.sqlite.save(unique_keys=[], table_name='stations', data=tmpdata)
tmpdata=[]
tmpdata2=[]
#'desc': ['The Conservative Party', 'Candidate'], 'candidate': ['OULTON', 'Erica'], 'address': ['Blandings, Horringford,', 'Arreton, IW, PO30 3AP']
for candidate in data['list']:
tmpdata.append( {'ward':data['ward'],'candidate':' '.join(candidate['candidate']).encode('ascii','ignore'),
'address':' '.join(candidate['address']),'desc':' '.join(candidate['desc']) } )
party=' '.join(candidate['desc']).replace('Candidate','').strip()
cand=' '.join(candidate['candidate']).encode('ascii','ignore')
cs=cand.strip(' ').split(' ')
if len(cs)>2:
cand2=cs[:2]
for ci in cs[2:]:
cand2.append(ci[0]+'.')
else: cand2=cs
ctmp=cand2[0]
cand2.remove(ctmp)
cand2.append(ctmp.title())
candi=' '.join(cand2).encode('ascii','ignore')
for proposer in candidate['proposers']:
if proposer.find('(+)')>-1:
proposer=proposer.replace('(+)','').strip()
typ='proposer'
else:typ='assentor'
tmpdata2.append({ 'ward':data['ward'],'candidate':cand, 'candinit':candi, 'support':proposer,'role':'proposal', 'typ':typ, 'desc':party }.copy())
for seconder in candidate['seconders']:
if seconder.find('(++)')>-1:
seconder=seconder.replace('(++)','').strip().encode('ascii','ignore')
typ='seconder'
else:typ='assentor'
tmpdata2.append({ 'ward':data['ward'],'candidate':cand, 'candinit':candi, 'support':seconder,'role':'seconding', 'typ':typ, 'desc':party }.copy())
scraperwiki.sqlite.save(unique_keys=[], table_name='candidates', data=tmpdata)
scraperwiki.sqlite.save(unique_keys=[], table_name='support', data=tmpdata2)
#Check database tables
scraperwiki.sql.show_tables()
{u'candidates': u'CREATE TABLE candidates (\n\tward TEXT, \n\t"desc" TEXT, \n\tcandidate TEXT, \n\taddress TEXT\n)', u'stations': u'CREATE TABLE stations (\n\tward TEXT, \n\tstation TEXT, \n\trange TEXT\n)', u'support': u'CREATE TABLE support (\n\tcandinit TEXT, \n\trole TEXT, \n\tcandidate TEXT, \n\tsupport TEXT, \n\ttyp TEXT, \n\tward TEXT, \n\t"desc" TEXT\n)'}
%matplotlib inline
import pandas as pd
import sqlite3
#Create a connection to the database so we can query it using pandas
conn = sqlite3.connect(dbname)
/Users/ajh59/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment. warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
#TO DO... scraper not complete for extracting poll station info
stations= pd.read_sql_query("SELECT * FROM stations", conn)
stations.head(5)
ward | station | range | |
---|---|---|---|
0 | Clavering | ||
1 | Costessey | Costessey Methodist Church Hall, Norwich Road,... | 18 LU1-1 to LU1-2092 |
2 | Costessey | Costessey - Breckland Hall, Breckland Road, Ne... | 19 LV1-1 to LV1-2260 |
3 | Costessey | Costessey - Baptist Church Hall, The Street, O... | 20 NE1-1 to NE1-1927 |
4 | Costessey | Costessey Victory Academy, Luke Day Block, Vic... | 21 NF1-1 to NF1-2419 |
candidates = pd.read_sql_query("SELECT * FROM candidates", conn)
#Clean the data a bit - should maybe do this as part of the scrape, or provide a "clean col" as part of scrape
candidates['desc']=candidates['desc'].str.replace('The ','').str.replace(' Candidate','')
candidates.head(5)
ward | desc | candidate | address | |
---|---|---|---|---|
0 | Clavering | Liberal Democrat | BROWN Christopher John | Globe House, Norwich Road, Denton, Harleston, ... |
1 | Clavering | Labour Party | FOWLER Nicola Jeannette | 21 Springfields, Poringland, Norwich, NR14 7RG |
2 | Clavering | Conservative Party | STONE Margaret Florence | 25 Field Lane, Hempnall, Norwich, Norfolk, NR1... |
3 | Costessey | Liberal Democrat | EAST Tim | 7 St Walstans Close, Costessey, Norwich, NR5 0TW |
4 | Costessey | Labour Party | GARRARD Jonathan Peter | 68 Dereham Road, New Costessey, Norwich, NR5 0SY |
candidates['candidate'].unique()
array([u'BROWN Christopher John', u'FOWLER Nicola Jeannette', u'STONE Margaret Florence', u'EAST Tim', u'GARRARD Jonathan Peter', u'ROWETT Catherine Joanna', u'WILTSHIRE Andrew Roy', u'DAVISON Chris', u'KIDDIE Keith Walter', u'MILTON David', u'SCOGGINS Tracy Barbara', u'EDDY James William', u'KUZMIC Susan Evelyn', u'WILBY Martin James', u'FOULGER Colin Wayne', u'MCCLENNING Robert Arthur', u'SEWELL Steven Leigh', u'FOWLER Tom', u'HAMMOND Matthew', u'THOMSON Vic', u'BLATHWAYT Paul Wynter', u'DEWSBURY Margaret', u'LEMAN James Edward George', u'BILLS David', u'GULLIVER Bethan Sin', u'SUTTON Jacky', u'BINGHAM David Kenneth', u'BISSONNET David George', u'STONE Barry Michael', u'KATZ Elana', u'PERCIVAL Roger Neil', u'THOMAS Alison Mary', u'REEKIE Pam', u'SPRATT Beverley Herbert Allison', u'SPRATT Ian Victor', u'HALLS Julian Lawrence', u'MOONEY Joe', u'UNDERWOOD Doug'], dtype=object)
#Save supportes data file
candidates.to_csv(candsfilename,index=False)
import geocoder
def gc(address):
g=geocoder.google(address)
try:
return '{},{}'.format(g.latlng[0],g.latlng[1])
except:
pass
try:
pc=address.split(',')[-1].strip()
if pc.startswith('PO'):
g=geocoder.google(pc)
return '{},{}'.format(g.latlng[0],g.latlng[1])
else: return ''
except:
return ''
candidates['latlong']=candidates['address'].apply(gc)
candidates.head()
ward | desc | candidate | address | latlong | |
---|---|---|---|---|---|
0 | Clavering | Liberal Democrat | BROWN Christopher John | Globe House, Norwich Road, Denton, Harleston, ... | 52.448507,1.35477 |
1 | Clavering | Labour Party | FOWLER Nicola Jeannette | 21 Springfields, Poringland, Norwich, NR14 7RG | 52.5693366,1.3469526 |
2 | Clavering | Conservative Party | STONE Margaret Florence | 25 Field Lane, Hempnall, Norwich, Norfolk, NR1... | 52.4988471,1.298969 |
3 | Costessey | Liberal Democrat | EAST Tim | 7 St Walstans Close, Costessey, Norwich, NR5 0TW | 52.6460063,1.2041219 |
4 | Costessey | Labour Party | GARRARD Jonathan Peter | 68 Dereham Road, New Costessey, Norwich, NR5 0SY | 52.642241,1.231012 |
import folium
def add_marker(row,fmap):
if row['latlong']!='':
lat=row['latlong'].split(',')[0]
long=row['latlong'].split(',')[1]
folium.Marker([lat, long], popup='{}, ({})\n{}'.format(row['candidate'],row['desc'],row['ward'])).add_to(fmap)
#Create a map centered on the postcode location at a particular zoom level
#Really crude centrepoint for map
centrelatlong=candidates[candidates['latlong']!=''].iloc[0]['latlong'].split(',')
localmap = folium.Map(location=centrelatlong, zoom_start=11)
candidates.apply(lambda x: add_marker(x, localmap), axis=1)
localmap
#save the map
localmap.save(mapname)
This could be a bit ropey - really need to check we are using the correct administrative geographies.
Use a service like MapIt or postcodes.io to find ward from postcode, then compare this to the name of the ward they are standing in.
import requests
from ukpostcodeutils import validation
def getpc(addr):
''' Crude attempt to find postcode in address; conventionally last part of address in this dataset? '''
pc=addr.split(',')[-1].strip()
if validation.is_valid_postcode(pc.replace(' ','')): return pc
return ''
def getpcward(pc):
''' Lookup a ward from a postcode using postcodes.io '''
if pc!='':
try:
return requests.get('https://api.postcodes.io/postcodes/{}'.format(pc.replace(' ',''))).json()['result']['admin_ward']
except:
return ''
return ''
#Extract the postcode - conventionally, it looks like postcode is last part of address so guess at that
candidates['postcode']=candidates['address'].apply(getpc)
#Get the list of unique postcodes for candidates and lookup the corresponding ward
pcwards={pc: getpcward(pc) for pc in candidates['postcode'].unique()}
candidates['pcward']=candidates['postcode'].map(pcwards)
candidates.head()
#If this doesn't catch anything - could also try to use latlong where no postcode available...
ward | desc | candidate | address | latlong | postcode | pcward | |
---|---|---|---|---|---|---|---|
0 | Clavering | Liberal Democrat | BROWN Christopher John | Globe House, Norwich Road, Denton, Harleston, ... | 52.448507,1.35477 | IP20 0BD | Earsham |
1 | Clavering | Labour Party | FOWLER Nicola Jeannette | 21 Springfields, Poringland, Norwich, NR14 7RG | 52.5693366,1.3469526 | NR14 7RG | Poringland with the Framinghams |
2 | Clavering | Conservative Party | STONE Margaret Florence | 25 Field Lane, Hempnall, Norwich, Norfolk, NR1... | 52.4988471,1.298969 | NR15 2QZ | Hempnall |
3 | Costessey | Liberal Democrat | EAST Tim | 7 St Walstans Close, Costessey, Norwich, NR5 0TW | 52.6460063,1.2041219 | NR5 0TW | Old Costessey |
4 | Costessey | Labour Party | GARRARD Jonathan Peter | 68 Dereham Road, New Costessey, Norwich, NR5 0SY | 52.642241,1.231012 | NR5 0SY | New Costessey |
#display in ward - so Ward they're standing in is same as ward of their address
candidates[candidates['ward']==candidates['pcward']].head()
ward | desc | candidate | address | latlong | postcode | pcward |
---|
#display out ward - so Ward they're standing in is not the same as ward of their address
candidates[candidates['ward']!=candidates['pcward']]#.head()
ward | desc | candidate | address | latlong | postcode | pcward | |
---|---|---|---|---|---|---|---|
0 | Clavering | Liberal Democrat | BROWN Christopher John | Globe House, Norwich Road, Denton, Harleston, ... | 52.448507,1.35477 | IP20 0BD | Earsham |
1 | Clavering | Labour Party | FOWLER Nicola Jeannette | 21 Springfields, Poringland, Norwich, NR14 7RG | 52.5693366,1.3469526 | NR14 7RG | Poringland with the Framinghams |
2 | Clavering | Conservative Party | STONE Margaret Florence | 25 Field Lane, Hempnall, Norwich, Norfolk, NR1... | 52.4988471,1.298969 | NR15 2QZ | Hempnall |
3 | Costessey | Liberal Democrat | EAST Tim | 7 St Walstans Close, Costessey, Norwich, NR5 0TW | 52.6460063,1.2041219 | NR5 0TW | Old Costessey |
4 | Costessey | Labour Party | GARRARD Jonathan Peter | 68 Dereham Road, New Costessey, Norwich, NR5 0SY | 52.642241,1.231012 | NR5 0SY | New Costessey |
5 | Costessey | Green Party | ROWETT Catherine Joanna | 10 Caroline Court, Norwich, NR4 7EJ | 52.6214876,1.2628925 | NR4 7EJ | Eaton |
6 | Costessey | Conservative Party | WILTSHIRE Andrew Roy | 13 Cardinal Close, Easton, Norwich, NR9 5EW | 52.654118,1.1620887 | NR9 5EW | Easton |
7 | Diss and Roydon | Labour Party | DAVISON Chris | 1 Willbye Avenue, Diss, Norfolk, IP22 4NN | 52.3788722,1.1162182 | IP22 4NN | Diss |
8 | Diss and Roydon | Conservative Party | KIDDIE Keith Walter | 17 Walcot Road, Diss, Norfolk, IP22 4DB | 52.3812672,1.1136278 | IP22 4DB | Diss |
9 | Diss and Roydon | Green Party | MILTON David | 18 Friars Quay, Norwich, Norfolk, NR3 1ES | 52.6326424,1.2961341 | NR3 1ES | Mancroft |
10 | Diss and Roydon | Liberal Democrat | SCOGGINS Tracy Barbara | 22 Spencer Crescent, Diss, Norfolk, IP22 4UF | IP22 4UF | Diss | |
11 | East Depwade | Labour Party | EDDY James William | 11 Henry Ward Road, Harleston, Norfolk, IP20 9EZ | IP20 9EZ | Harleston | |
12 | East Depwade | Liberal Democrat | KUZMIC Susan Evelyn | 29 Gawdy Close, Harleston, IP20 9ET | 52.4077753,1.300708 | IP20 9ET | Harleston |
13 | East Depwade | Conservative Party | WILBY Martin James | New Lodge Farm, Common Road, Dickleburgh, Diss... | IP21 4PH | Dickleburgh | |
14 | Forehoe | Conservative Party | FOULGER Colin Wayne | Pear Tree House, The Turnpike, Bunwell, Norwic... | NR16 1SP | Bunwell | |
15 | Forehoe | Liberal Democrat | MCCLENNING Robert Arthur | Brunel, Cheneys Lane, Tacolneston, Norwich, NR... | NR16 1DB | Forncett | |
16 | Forehoe | Labour Party | SEWELL Steven Leigh | Medway, The Rosery, Mulbarton, Norwich, NR14 8AL | NR14 8AL | Mulbarton | |
17 | Henstead | Labour Party | FOWLER Tom | 21 Springfields, Poringland, Norwich, NR14 7RG | 52.5693366,1.3469526 | NR14 7RG | Poringland with the Framinghams |
18 | Henstead | Liberal Democrat | HAMMOND Matthew | 6 Church Farm Barns, The Street, Bramerton, NR... | NR14 7DW | Rockland | |
19 | Henstead | Conservative Party | THOMSON Vic | Yelverton Hall, Yelverton, Norwich, Norfolk, N... | 52.5733553,1.3568878 | NR14 7PD | Rockland |
20 | Hingham | Liberal Democrat | BLATHWAYT Paul Wynter | Rivendell, 21 Marlingford Lane, Easton, Norwic... | NR9 5AD | Easton | |
21 | Hingham | Conservative Party | DEWSBURY Margaret | 6 Park Avenue, Barford, Norwich, Norfolk, NR9 4BA | 52.6250533,1.1238474 | NR9 4BA | Easton |
22 | Hingham | Labour Party | LEMAN James Edward George | 48 Silfield Road, Wymondham, NR18 9AY | 52.5597768,1.1202557 | NR18 9AY | Cromwells |
23 | Humbleyard | Conservative Party | BILLS David | 3 Beech Court, Norwich Road, Hethersett, Norwi... | 52.597855,1.1817466 | NR9 3FE | Hethersett |
24 | Humbleyard | Labour Party | GULLIVER Bethan Sin | Laurel House, Norwich Road, Tacolneston, Norwi... | 52.5074631,1.1567953 | NR16 1BY | Forncett |
25 | Humbleyard | Liberal Democrat | SUTTON Jacky | 12 Childs Road, Hethersett, NR9 3HN | 52.6002644,1.1661295 | NR9 3HN | Hethersett |
26 | Loddon | Liberal Democrat | BINGHAM David Kenneth | 19 Gale Close, Hales, Norwich, NR14 6SN | 52.5204002,1.5095713 | NR14 6SN | Gillingham |
27 | Loddon | Labour Party | BISSONNET David George | Duck Cottage, 3 Ferry Road, Carleton St Peter,... | 52.5764436,1.463634 | NR14 7AY | Chedgrave and Thurton |
28 | Loddon | Conservative Party | STONE Barry Michael | 25 Field Lane, Hempnall, Norwich, Norfolk, NR1... | 52.4988471,1.298969 | NR15 2QZ | Hempnall |
29 | Long Stratton | Labour Party | KATZ Elana | The Farmhouse, Wolsey Farm, Durbidges Hill, Di... | IP22 5SY | Bressingham and Burston | |
30 | Long Stratton | Liberal Democrat | PERCIVAL Roger Neil | The Barn, Rattees Corner, Hapton Road, Fundenh... | NR16 1EQ | Forncett | |
31 | Long Stratton | Conservative Party | THOMAS Alison Mary | Briardale, Ipswich Road, Long Stratton, Norwic... | NR15 2TF | Stratton | |
32 | West Depwade | Labour Party | REEKIE Pam | The White House, Ipswich Road, Dickleburgh, IP... | IP21 4NJ | Dickleburgh | |
33 | West Depwade | Conservative Party | SPRATT Beverley Herbert Allison | Lakes Farm, Hall Road, Tacolneston, Norwich, N... | NR16 1DN | Forncett | |
34 | West Depwade | Liberal Democrat | SPRATT Ian Victor | 29 Knyvett Green, Ashwellthorpe, Norwich, Norf... | 52.5334951,1.159976 | NR16 1HA | Forncett |
35 | Wymondham | Liberal Democrat | HALLS Julian Lawrence | 2 Chapel Loke, Spooner Row, Wymondham, NR18 9LS | 52.5364803,1.0912552 | NR18 9LS | Cromwells |
36 | Wymondham | Conservative Party | MOONEY Joe | 2 Orchard Way, Wymondham, Norfolk, NR18 0NX | 52.5723846,1.11843 | NR18 0NX | Town |
37 | Wymondham | Labour Party | UNDERWOOD Doug | 14 Herb Robert Glade, Wymondham, Norfolk, NR18... | 52.5708446,1.1297286 | NR18 0XS | Town |
From the table of supporters, we can try to identify candidates who support other candidates.
#Preview the supporters table
supporters = pd.read_sql_query("SELECT * FROM support", conn)
#Clean the data a bit
supporters['desc']=supporters['desc'].str.replace('The ','').str.replace(' Candidate','')
supporters.head(5)
supporters.head(5)
candinit | role | candidate | support | typ | ward | desc | |
---|---|---|---|---|---|---|---|
0 | Christopher J. Brown | proposal | BROWN Christopher John | Murray Gray | proposer | Clavering | Liberal Democrat |
1 | Christopher J. Brown | proposal | BROWN Christopher John | Richard A P Carden | assentor | Clavering | Liberal Democrat |
2 | Christopher J. Brown | proposal | BROWN Christopher John | Noelle R M Barber | assentor | Clavering | Liberal Democrat |
3 | Christopher J. Brown | proposal | BROWN Christopher John | Reginald A Kirkpatrick | assentor | Clavering | Liberal Democrat |
4 | Christopher J. Brown | proposal | BROWN Christopher John | Paul E J Chaston | assentor | Clavering | Liberal Democrat |
#Save supportes data file
supporters.to_csv(supportersfilename,index=False)
#Find the unique parties
supporters['desc'].unique()
array([u'Liberal Democrat', u'Labour Party', u'Conservative Party', u'Green Party'], dtype=object)
colourmap={'Liberal Democrat':'yellow', 'Independent':'black', 'Labour Party':"red",
'Conservative Party':'blue', 'Green Party':'green', 'UKIP':'purple',
'Labour and Co- operative Party':'red'}
#Create a graph
import networkx as nx
#G=nx.from_pandas_dataframe(supporters, 'support', 'candinit')
def build_graph(row,DG):
DG.add_node(row['support'],color=colourmap[row['desc']])
DG.add_node(row['candinit'],color=colourmap[row['desc']])
DG.add_edge(row['support'],row['candinit'],color=colourmap[row['desc']])
return
DG=nx.DiGraph()
supporters.apply(lambda x: build_graph(x,DG), axis=1);
nodes = DG.nodes()
edges = DG.edges()
#filter on people who are supported and who support
supports_deg = DG.out_degree(nodes)
supported_deg = DG.in_degree(nodes)
supports = [n for n in supports_deg if supports_deg[n]]
supported = [n for n in supported_deg if supported_deg[n]]
GG=nx.DiGraph()
#Merge the egographs of people of people who support and are supported
for s2 in list(set(supports).intersection(set(supported))):
GG=nx.compose(GG,nx.ego_graph(DG,s2,5))
#http://bl.ocks.org/brinrosenthal/raw/cfb0e12f113d55551a45d530527baedf/
import visJS2jupyter.visJS_module
nodes = GG.nodes()
edges = GG.edges()
pos = nx.spring_layout(GG)
nodes_dict = [{"id":n,"color":GG.node[n]['color'],
"x":pos[n][0],
"y":pos[n][1]} for n in nodes]
node_map = dict(zip(nodes,range(len(nodes)))) # map to indices for source/target in edges
edges_dict = [{"source":node_map[edges[i][0]], "target":node_map[edges[i][1]], "color":GG[edges[i][0]][edges[i][1]]['color'],
"title":'test'} for i in range(len(edges))]
visJS2jupyter.visJS_module.visjs_network(nodes_dict,edges_dict)
#Support between connected candidates where a candidate supports another candidate
for e in edges:
print(e)
(u'Elana Katz', u'Pam Reekie')
Does it look like the same person is supporting more than one candidate?
supporters[supporters['support'].isin(supporters[supporters.duplicated(subset='support')]['support'].unique())]
candinit | role | candidate | support | typ | ward | desc |
---|
#Another way of doing that
pd.read_sql_query("SELECT * FROM support WHERE support=(SELECT support FROM support GROUP BY support HAVING COUNT(*)>1)", conn)
candinit | role | candidate | support | typ | ward | desc |
---|
Does it look like multiple candidates share the same address?
## Multiple candidates from one address
pd.read_sql_query("SELECT * FROM candidates WHERE address=(SELECT address FROM candidates GROUP BY address HAVING COUNT(*)>1)", conn)
ward | desc | candidate | address | |
---|---|---|---|---|
0 | Clavering | Labour Party | FOWLER Nicola Jeannette | 21 Springfields, Poringland, Norwich, NR14 7RG |
1 | Henstead | Labour Party | FOWLER Tom | 21 Springfields, Poringland, Norwich, NR14 7RG |
Does it look like the same party is supporting more than one candidate in a particular ward?
## Multiple Candidates for a Party in Same Ward
pd.read_sql_query("SELECT ward,desc, count(*) FROM support WHERE typ='proposer' GROUP BY ward,desc HAVING COUNT(*)>1", conn)
ward | desc | count(*) |
---|
Check to see whether the names of candidates are also possible company directors.
Could also do a check to see if they are charity trustees, bankrupt, disqualified director, registered licensee on any IW Council registers etc etc.
NOTE THAT THE FOLLOWING DOES NOT GUARANTEE OR NECESSARILY IMPLY THAT THE PERSON NAMED AS STANDING IS THE SAME PERSON AS A SIMILARLY NAMED COMPANY OFFICER.
import urllib2, base64, json
from urllib import urlencode
from time import sleep
def url_nice_req(url,t=300):
try:
return urllib2.urlopen(url)
except urllib2.HTTPError, e:
if e.code == 429:
print("Overloaded API, resting for a bit...")
sleep(t)
return url_req(url)
#Inspired by http://stackoverflow.com/a/2955687/454773
def ch_request(CH_API_TOKEN,url,args=None):
if args is not None:
url='{}?{}'.format(url,urlencode(args))
request = urllib2.Request(url)
# You need the replace to handle encodestring adding a trailing newline
# (https://docs.python.org/2/library/base64.html#base64.encodestring)
base64string = base64.encodestring('%s:' % (CH_API_TOKEN)).replace('\n', '')
request.add_header("Authorization", "Basic %s" % base64string)
result = url_nice_req(request)
#This is too hacky - need to see why it fails if it does
if result is None:
print('Oops: {}, {}'.format(url,result))
return None
j=json.loads(result.read())
return j
def ch_getAppointments(slug,location=None,typ='all',role='all',n=500,start_index=''):
if len(slug.split('/'))==1:
slug='/officers/{}/appointments'.format(slug)
url= 'https://api.companieshouse.gov.uk{}'.format(slug)
properties={'items_per_page':n,'start_index':start_index}
a=ch_request(CH_API_TOKEN,url,properties)
if a is None: return None
if location is not None:
a['items']=[i for i in a['items'] if location.lower() in i['address']['locality'].lower()]
if typ=='current':
a['items']=[i for i in a['items'] if 'resigned_on' not in i]
a['items']=[i for i in a['items'] if 'company_status' in i['appointed_to'] and i['appointed_to']['company_status'] == 'active']
#should possibly check here that len(co['items'])==co['active_count'] ?
elif typ=='previous':
a['items']=[i for i in a['items'] if 'resigned_on' in i]
elif typ=='dissolved':
a['items']=[i for i in a['items'] if 'company_status' in i['appointed_to'] and i['appointed_to']['company_status'] == 'dissolved']
if role!='all':
a['items']=[i for i in a['items'] if role==i['officer_role']]
return a
def ch_searchOfficers(name,n=50,start_index='',company='',companies=False,exact=None):
url= 'https://api.companieshouse.gov.uk/search/officers'
properties={'q':name,'items_per_page':n,'start_index':start_index}
o=ch_request(CH_API_TOKEN,url,properties)
if o is None: return o
if exact=='forename':
#This isn't right eg double barrelled surnames
s=name.lower().split(' ')
o['items'] = [i for i in o['items'] if i['title'].lower().split(' ')[0]==s[0] and i['title'].lower().split(' ')[-1]==s[-1]]
elif exact=='fullname':
o['items'] = [i for i in o['items'] if i['title'].lower()==name.lower()]
if company != '':
for p in o['items']:
p['items'] = [i for i in ch_getAppointments(p['links']['self'])['items'] if company.lower() in i['appointed_to']['company_name'].lower()]
o['items'] = [i for i in o['items'] if len(i['items'])]
if companies:
for p in o['items']:
p['items'] = [i for i in ch_getAppointments(p['links']['self'])['items']]
o['items'] = [i for i in o['items'] if len(i['items'])]
return o
appointments=pd.DataFrame()
for c in candidates['candidate'].tolist():
name=c.split()
cand=' '.join(name[1:]+name[0:1])
results=ch_searchOfficers(cand,n=50,exact='fullname')
for result in results['items']:
appointments=pd.concat([appointments,pd.DataFrame.from_dict([result])])
appointments[appointments['address_snippet'].str.contains(localarea)]
address | address_snippet | appointment_count | date_of_birth | description | description_identifiers | kind | links | matches | snippet | title | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | {u'country': u'PE38 0JN', u'region': u'Norfolk... | Holly House, Ely Road Hilgay, Downham Market, ... | 4 | {u'year': 1964, u'month': 4} | Total number of appointments 4 - Born April 1964 | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/P3V_0gkwW7uqr5JfjhoylIRG... | {u'snippet': [], u'title': [1, 11, 13, 16, 18,... | Christopher John BROWN | |
0 | {u'locality': u'Norwich', u'premises': u'21', ... | 21 Springfields, Poringland, Norwich, Norfolk,... | 2 | {u'year': 1968, u'month': 9} | Total number of appointments 2 - Born Septembe... | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/TL0ksAgMupTuLf8CHv1dwcUB... | {u'snippet': [], u'title': [1, 6, 8, 16, 18, 23]} | Nicola Jeannette FOWLER | |
0 | {u'locality': u'Harleston', u'premises': u'Off... | Office Number One First Floor Offices, Memoria... | 1 | {u'year': 1951, u'month': 12} | Total number of appointments 1 - Born December... | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/U7wK3BHRf8fK2JJUk9xkqm-G... | {u'snippet': [], u'title': [1, 6, 8, 12, 14, 18]} | Martin James WILBY | |
0 | {u'locality': u'Norwich', u'premises': u'1', u... | 1 Middleton Street, Wymondham, Norwich, Norfol... | 2 | {u'month': 7, u'year': 1944} | Total number of appointments 2 - Born July 1944 | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/vuhGJ1vGf7rPTPHrW6oG-8vp... | {u'snippet': [], u'title': [1, 5, 7, 11, 13, 19]} | Colin Wayne FOULGER | |
0 | {u'country': u'NR14 8AL', u'region': u'Norfolk... | Medway, The Rosery Mulbarton, Norwich, Norfolk... | 0 | {u'year': 1957, u'month': 5} | Total number of appointments 0 - Born May 1957 | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/-GPL4ykYfKUTSa2v9JgJZGV8... | {u'snippet': [], u'title': [1, 6, 8, 12, 14, 19]} | Steven Leigh SEWELL | |
0 | {u'premises': u'Shire Hall', u'country': u'NR1... | Shire Hall, Market Avenue, Norwich, Norfolk, N... | 3 | {u'year': 1950, u'month': 5} | Total number of appointments 3 - Born May 1950 | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/iVcH6EunMuXWPbRK1GH5CpYf... | {u'snippet': [], u'title': [1, 8, 10, 17]} | Margaret DEWSBURY | |
0 | {u'premises': u'Dragon Hall', u'country': u'NR... | Dragon Hall, 115/123 King Street, Norwich, Nor... | 0 | {u'year': 1947, u'month': 3} | Total number of appointments 0 - Born March 1947 | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/FJef2bYUqPqvbSUWJnSRDEFJ... | {u'snippet': [], u'title': [1, 5, 7, 12, 14, 22]} | David George BISSONNET | |
0 | {u'locality': u'Norwich', u'premises': u'25', ... | 25 Field Lane, Hempnall, Norwich, Norfolk, Eng... | 1 | {u'year': 1949, u'month': 6} | Total number of appointments 1 - Born June 1949 | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/y98lznNgBxCMZkhX20ajblyq... | {u'snippet': [], u'title': [1, 5, 7, 13, 15, 19]} | Barry Michael STONE | |
0 | {u'locality': u'Great Yarmouth', u'premises': ... | 35 Clover Way, Bradwell, Great Yarmouth, Norfo... | 7 | {u'year': 1949, u'month': 6} | Total number of appointments 7 - Born June 1949 | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/7OzEbU9cwaUT9OpeWYKzRswf... | {u'snippet': [], u'title': [1, 5, 7, 13, 15, 19]} | Barry Michael STONE | |
0 | {u'country': u'NR16 1EQ', u'region': u'Norfolk... | The Barn Rattees Corner, Fundenhall, Norwich, ... | 1 | {u'year': 1946, u'month': 9} | Total number of appointments 1 - Born Septembe... | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/YTp80cvjgThc5CWBd6T-n110... | {u'snippet': [], u'title': [1, 5, 7, 10, 12, 19]} | Roger Neil PERCIVAL | |
0 | {u'locality': u'Wymondham', u'premises': u'Wym... | Wymondham College, Golf Links Road, Morley, Wy... | 4 | {u'year': 1961, u'month': 12} | Total number of appointments 4 - Born December... | [appointment-count, born-on] | searchresults#officer | {u'self': u'/officers/5nNHBr0osJASt981ieIgtgVX... | {u'snippet': [], u'title': [1, 6, 8, 11, 13, 18]} | Alison Mary THOMAS |
companies=pd.DataFrame()
localAppointments=appointments[appointments['address_snippet'].str.contains(localarea)]
for appointment in localAppointments['links'].apply(pd.Series)['self'].tolist():
ddx=pd.DataFrame.from_dict(ch_getAppointments(appointment)['items'])
tmp=pd.concat([ddx.drop(['appointed_to','name_elements','links','address'], axis=1),
ddx['appointed_to'].apply(pd.Series),ddx['address'].apply(pd.Series)], axis=1)
companies=pd.concat([companies,tmp])
corecols=['name','company_status','company_number','company_name','appointed_on','resigned_on']
localcos=companies[corecols].reset_index(drop=True)
localcos=localAppointments[['title','address_snippet']].merge(localcos, left_on='title',right_on='name')[corecols+['address_snippet']]
localcos.head()
name | company_status | company_number | company_name | appointed_on | resigned_on | address_snippet | |
---|---|---|---|---|---|---|---|
0 | Christopher John BROWN | active | 05491209 | CHRIS BROWN (DOWNHAM MARKET) LIMITED | 2005-06-27 | NaN | Holly House, Ely Road Hilgay, Downham Market, ... |
1 | Christopher John BROWN | active | 03258034 | ARTEK-DESIGN-HOUSE LIMITED | 1997-06-17 | 2005-04-30 | Holly House, Ely Road Hilgay, Downham Market, ... |
2 | Christopher John BROWN | active | 02351921 | BROWN CONSULTING SERVICES LIMITED | 1997-05-06 | 1997-06-18 | Holly House, Ely Road Hilgay, Downham Market, ... |
3 | Christopher John BROWN | active | 03258034 | ARTEK-DESIGN-HOUSE LIMITED | 1996-10-02 | 1997-06-16 | Holly House, Ely Road Hilgay, Downham Market, ... |
4 | Nicola Jeannette FOWLER | active | 10032213 | NJF CONNECTIONS LIMITED | 2016-02-29 | NaN | 21 Springfields, Poringland, Norwich, Norfolk,... |
#Save company data file
localcos.to_csv(companiesfilename,index=False)