#data from http://www.electoralcommission.org.uk/find-information-by-subject/elections-and-referendums/upcoming-elections-and-referendums/eu-referendum/electorate-and-count-information
!ls EU*
EU-referendum-result-data.csv EURef.ipynb
Load the data in:
import pandas as pd
#df=pd.read_csv('EU-referendum-result-data.csv')
url='http://www.electoralcommission.org.uk/__data/assets/file/0014/212135/EU-referendum-result-data.csv'
df=pd.read_csv(url)
df[:3]
id | Region_Code | Region | Area_Code | Area | Electorate | ExpectedBallots | VerifiedBallotPapers | Pct_Turnout | Votes_Cast | ... | Remain | Leave | Rejected_Ballots | No_official_mark | Voting_for_both_answers | Writing_or_mark | Unmarked_or_void | Pct_Remain | Pct_Leave | Pct_Rejected | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 108 | E12000006 | East | E06000031 | Peterborough | 120892 | 87474 | 87469 | 72.35 | 87469 | ... | 34176 | 53216 | 77 | 0 | 32 | 7 | 38 | 39.11 | 60.89 | 0.09 |
1 | 109 | E12000006 | East | E06000032 | Luton | 127612 | 84633 | 84636 | 66.31 | 84616 | ... | 36708 | 47773 | 135 | 0 | 85 | 0 | 50 | 43.45 | 56.55 | 0.16 |
2 | 112 | E12000006 | East | E06000033 | Southend-on-Sea | 128856 | 93948 | 93939 | 72.90 | 93939 | ... | 39348 | 54522 | 69 | 0 | 21 | 0 | 48 | 41.92 | 58.08 | 0.07 |
3 rows × 21 columns
Check the columns:
df.columns
Index(['id', 'Region_Code', 'Region', 'Area_Code', 'Area', 'Electorate', 'ExpectedBallots', 'VerifiedBallotPapers', 'Pct_Turnout', 'Votes_Cast', 'Valid_Votes', 'Remain', 'Leave', 'Rejected_Ballots', 'No_official_mark', 'Voting_for_both_answers', 'Writing_or_mark', 'Unmarked_or_void', 'Pct_Remain', 'Pct_Leave', 'Pct_Rejected'], dtype='object')
How close were the votes? Count number of areas where difference between remanin and leave was less than or equal to 2 * halfwidth
(i.e. both votes in band 50 +/- halfwidth
.
halfwidth=1
df[(abs(df['Pct_Remain']-50)<halfwidth) & (abs(df['Pct_Leave']-50)<halfwidth)]
id | Region_Code | Region | Area_Code | Area | Electorate | ExpectedBallots | VerifiedBallotPapers | Pct_Turnout | Votes_Cast | ... | Leave | Rejected_Ballots | No_official_mark | Voting_for_both_answers | Writing_or_mark | Unmarked_or_void | Pct_Remain | Pct_Leave | Pct_Rejected | spread | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
7 | 185 | E12000006 | East | E07000009 | East Cambridgeshire | 62435 | 48129 | 48120 | 77.08 | 48124 | ... | 24487 | 38 | 0 | 18 | 0 | 20 | 49.08 | 50.92 | 0.08 | -1.84 |
22 | 233 | E12000006 | East | E07000077 | Uttlesford | 64735 | 51973 | 51972 | 80.28 | 51972 | ... | 26324 | 29 | 0 | 5 | 0 | 24 | 49.32 | 50.68 | 0.06 | -1.36 |
24 | 252 | E12000006 | East | E07000096 | Dacorum | 108965 | 86313 | 86308 | 79.21 | 86307 | ... | 43702 | 63 | 4 | 24 | 0 | 35 | 49.33 | 50.67 | 0.07 | -1.34 |
25 | 254 | E12000006 | East | E07000098 | Hertsmere | 73295 | 56161 | 56159 | 76.62 | 56159 | ... | 28532 | 34 | 0 | 12 | 0 | 22 | 49.16 | 50.84 | 0.06 | -1.68 |
28 | 259 | E12000006 | East | E07000103 | Watford | 65060 | 46635 | 46635 | 71.68 | 46635 | ... | 23419 | 49 | 0 | 23 | 4 | 22 | 49.73 | 50.27 | 0.11 | -0.54 |
45 | 253 | E12000006 | East | E07000242 | East Hertfordshire | 106260 | 85446 | 85435 | 80.40 | 85433 | ... | 42994 | 67 | 0 | 26 | 5 | 36 | 49.64 | 50.36 | 0.08 | -0.72 |
49 | 90 | E12000004 | East Midlands | E06000017 | Rutland | 29390 | 22989 | 22986 | 78.20 | 22984 | ... | 11613 | 18 | 0 | 9 | 2 | 7 | 49.43 | 50.57 | 0.08 | -1.14 |
50 | 91 | E12000004 | East Midlands | E06000018 | Nottingham | 195394 | 120792 | 120792 | 61.82 | 120791 | ... | 61343 | 130 | 0 | 47 | 4 | 79 | 49.16 | 50.84 | 0.11 | -1.68 |
56 | 200 | E12000004 | East Midlands | E07000037 | High Peak | 72487 | 54864 | 54864 | 75.69 | 54864 | ... | 27717 | 31 | 0 | 10 | 3 | 18 | 49.45 | 50.55 | 0.06 | -1.10 |
61 | 287 | E12000004 | East Midlands | E07000131 | Harborough | 67420 | 54922 | 54918 | 81.44 | 54910 | ... | 27850 | 32 | 0 | 1 | 2 | 29 | 49.25 | 50.75 | 0.06 | -1.50 |
92 | 6 | E12000007 | London | E09000006 | Bromley | 231473 | 182570 | 182570 | 78.87 | 182570 | ... | 90034 | 138 | 0 | 43 | 0 | 95 | 50.65 | 49.35 | 0.08 | 1.30 |
127 | 54 | E12000001 | North East | E08000021 | Newcastle upon Tyne | 190735 | 129072 | 129072 | 67.67 | 129072 | ... | 63598 | 69 | 0 | 20 | 5 | 44 | 50.70 | 49.30 | 0.05 | 1.40 |
137 | 80 | E12000002 | North West | E06000050 | Cheshire West and Chester | 259878 | 193640 | 193633 | 74.51 | 193633 | ... | 98082 | 96 | 0 | 27 | 11 | 58 | 49.32 | 50.68 | 0.05 | -1.36 |
183 | 168 | S92000003 | Scotland | S12000020 | Moray | 71370 | 48148 | 48139 | 67.45 | 48139 | ... | 23992 | 33 | 0 | 16 | 3 | 14 | 50.13 | 49.87 | 0.07 | 0.26 |
216 | 180 | E12000008 | South East | E07000004 | Aylesbury Vale | 136235 | 106908 | 106895 | 78.46 | 106895 | ... | 53956 | 62 | 0 | 25 | 2 | 35 | 49.50 | 50.50 | 0.06 | -1.00 |
218 | 182 | E12000008 | South East | E07000006 | South Bucks | 52194 | 40755 | 40755 | 78.08 | 40755 | ... | 20647 | 31 | 0 | 10 | 5 | 16 | 49.30 | 50.70 | 0.08 | -1.40 |
226 | 241 | E12000008 | South East | E07000085 | East Hampshire | 90588 | 73971 | 73971 | 81.65 | 73967 | ... | 36576 | 45 | 0 | 12 | 1 | 32 | 50.52 | 49.48 | 0.06 | 1.04 |
248 | 327 | E12000008 | South East | E07000177 | Cherwell | 108342 | 81909 | 81912 | 75.60 | 81908 | ... | 41168 | 72 | 0 | 29 | 1 | 42 | 49.69 | 50.31 | 0.09 | -0.62 |
257 | 356 | E12000008 | South East | E07000211 | Reigate and Banstead | 103731 | 81200 | 81200 | 78.28 | 81200 | ... | 40980 | 39 | 1 | 9 | 1 | 28 | 49.51 | 50.49 | 0.05 | -0.98 |
260 | 359 | E12000008 | South East | E07000214 | Surrey Heath | 65569 | 52330 | 52330 | 79.81 | 52330 | ... | 26667 | 25 | 0 | 8 | 1 | 16 | 49.02 | 50.98 | 0.05 | -1.96 |
266 | 370 | E12000008 | South East | E07000225 | Chichester | 91659 | 71411 | 71406 | 77.91 | 71407 | ... | 36326 | 70 | 3 | 27 | 1 | 39 | 49.08 | 50.92 | 0.10 | -1.84 |
309 | 126 | W92000004 | Wales | W06000001 | Isle of Anglesey | 51445 | 37980 | 37981 | 73.82 | 37978 | ... | 19333 | 27 | 0 | 11 | 0 | 16 | 49.06 | 50.94 | 0.07 | -1.88 |
321 | 139 | W92000004 | Wales | W06000014 | Vale of Glamorgan | 95011 | 72348 | 72347 | 76.15 | 72348 | ... | 35628 | 39 | 0 | 12 | 0 | 27 | 50.73 | 49.27 | 0.05 | 1.46 |
327 | 145 | W92000004 | Wales | W06000021 | Monmouthshire | 71607 | 55670 | 55670 | 77.74 | 55670 | ... | 27569 | 40 | 19 | 0 | 0 | 21 | 50.44 | 49.56 | 0.07 | 0.88 |
354 | 58 | E12000005 | West Midlands | E08000025 | Birmingham | 707293 | 451422 | 451336 | 63.81 | 451316 | ... | 227251 | 614 | 0 | 311 | 17 | 286 | 49.58 | 50.42 | 0.14 | -0.84 |
368 | 315 | E12000003 | Yorkshire and The Humber | E07000165 | Harrogate | 119987 | 94669 | 94665 | 78.89 | 94653 | ... | 46374 | 68 | 2 | 25 | 3 | 38 | 50.97 | 49.03 | 0.07 | 1.94 |
376 | 52 | E12000003 | Yorkshire and The Humber | E08000019 | Sheffield | 396406 | 266954 | 266951 | 67.34 | 266951 | ... | 136018 | 198 | 0 | 76 | 6 | 116 | 49.01 | 50.99 | 0.07 | -1.98 |
380 | 68 | E12000003 | Yorkshire and The Humber | E08000035 | Leeds | 543033 | 387730 | 387730 | 71.39 | 387677 | ... | 192474 | 340 | 39 | 116 | 8 | 177 | 50.31 | 49.69 | 0.09 | 0.62 |
28 rows × 22 columns
Caluclate the spread(?) - difference between the remain and leave.
df['spread']=df['Pct_Remain']-df['Pct_Leave']
Plot the spread as a histogram, using quite tight bin widths.
df['spread'].plot(kind='hist',bins=200, figsize=(20,10))
<matplotlib.axes._subplots.AxesSubplot at 0xac7e030c>
How about a cumulative count of the absolute spread?
abs(df['spread']).plot(kind='hist',bins=200, figsize=(20,10),cumulative=True)
<matplotlib.axes._subplots.AxesSubplot at 0xac85d12c>
Do some nicer plots?
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# Plot a historgram and kernel density estimate
sns.distplot(df['spread'], color="m",bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0xacd9446c>
Crib from the seaborn gallery to do a multiplot.
f, axes = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
sns.distplot(df['spread'], kde=False, color="b", ax=axes[0],bins=200)
sns.distplot(df['spread'], color="m", ax=axes[1],bins=200)
<matplotlib.axes._subplots.AxesSubplot at 0xabf7634c>
How about the absolute spread?
f, axes = plt.subplots(2, 1, figsize=(7, 7), sharex=True)
sns.distplot(abs(df['spread']), kde=False, color="b", ax=axes[0],bins=100)
sns.distplot(abs(df['spread']), color="m", ax=axes[1],bins=100)
<matplotlib.axes._subplots.AxesSubplot at 0xabc1488c>
How about a cumulative kernel density estimate of the absolute spread?
sns.kdeplot(abs(df['spread']), cumulative=True)
<matplotlib.axes._subplots.AxesSubplot at 0xabc1a2ac>
What if we count over the actual (signed) spread? What percentage of reporting areas voted leave (negative)?
sns.kdeplot(df['spread'], cumulative=True)
<matplotlib.axes._subplots.AxesSubplot at 0xab95f66c>