import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import lzma,json,gzip
import urllib.request
import zipfile
try:
import zlib
compression = zipfile.ZIP_DEFLATED
except:
compression = zipfile.ZIP_STORED
from utils import party_color_links,\
get_link_color,\
party_normalizer,\
party_normalizer2,\
hu_country,\
get_photo,get_photos,get_url,\
load_file, save_local, load_local
groups=load_local('groups')
parties=load_local('parties')
names=load_local('names')
plt.style.use('fivethirtyeight')
print(plt.style.available)
['bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark-palette', 'seaborn-dark', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'seaborn', 'Solarize_Light2', 'tableau-colorblind10', '_classic_test']
for e,i in enumerate(party_color_links):
plt.plot([0,1],[e,e],color=get_link_color(i),lw=3,label=i)
plt.legend(fontsize=8,loc=3,framealpha=1)
for e,i in enumerate(party_color_links):
print(i+':',get_link_color(i))
ALDE: #3755a3 ECR: #0454a4 ENF: #777777 N/A: #444444 EPP: #051ecb S&D: #fc0404 Greens: #04b404 ALDE RO: #459ccc DK: #1464ac Egyéb: #04632c Együtt PM: #1f3b17 FDGR: #961934 FIDESZ-KDNP: #fb6b04 Jobbik: #3cb25a LMP: #7dbb37 MSZP: #b4c0af PMP: #0484cc PNL: #046cab PNȚCD: #04843c PRM: #0d5ca4 PSD: #dc2c24 RMDSZ: #1e3c21 UNPR: #a8190f
for e,i in enumerate(party_color_links):
print(party_normalizer2(i)+':',get_link_color(i))
ALDE ⏩: #3755a3 ECR 🦁: #0454a4 ENF 🌐: #777777 N/A 👤: #444444 EPP ⭐️: #051ecb S&D 🔴: #fc0404 Greens 🌻: #04b404 ALDE RO 🕊️: #459ccc DK 🔵: #1464ac Egyéb ⭕️: #04632c Együtt PM ✳️: #1f3b17 FDGR ⚫️: #961934 FIDESZ-KDNP 🍊: #fb6b04 Jobbik ✅: #3cb25a LMP 🏃♂️: #7dbb37 MSZP 🌸: #b4c0af PMP 🍏: #0484cc PNL 🔶: #046cab PNȚCD ✳️: #04843c PRM 🔱: #0d5ca4 PSD 🌹: #dc2c24 RMDSZ 🌷: #1e3c21 UNPR 🦅: #a8190f
eu_vt=load_local('eu_vt')
eu_allegiance=load_local('eu_allegiance')
voteid_2_dossierid=load_local('voteid_2_dossierid')
countries=sorted(eu_vt)
Allegiance
def get_allegiance_matrix(key,vt,allegiance):
allegiance_matrix={}
initvote={'Same':0,'Opposite':0,'Total':0}
for j1 in vt:
outcome=j1['outcome']
name1=j1[key]
if name1 not in allegiance_matrix:allegiance_matrix[name1]={}
if outcome=='For':
for name2 in allegiance[j1['vote']]['For']:
if name2 not in allegiance_matrix[name1]:
allegiance_matrix[name1][name2]=dict(initvote)
allegiance_matrix[name1][name2]['Total']+=1
allegiance_matrix[name1][name2]['Same']+=1
if 'Against' in allegiance[j1['vote']]:
for name2 in allegiance[j1['vote']]['Against']:
if name2 not in allegiance_matrix[name1]:
allegiance_matrix[name1][name2]=dict(initvote)
allegiance_matrix[name1][name2]['Total']+=1
allegiance_matrix[name1][name2]['Opposite']+=1
elif outcome=='Against':
for name2 in allegiance[j1['vote']]['Against']:
if name2 not in allegiance_matrix[name1]:
allegiance_matrix[name1][name2]=dict(initvote)
allegiance_matrix[name1][name2]['Total']+=1
allegiance_matrix[name1][name2]['Same']+=1
if 'For' in allegiance[j1['vote']]:
for name2 in allegiance[j1['vote']]['For']:
if name2 not in allegiance_matrix[name1]:
allegiance_matrix[name1][name2]=dict(initvote)
allegiance_matrix[name1][name2]['Total']+=1
allegiance_matrix[name1][name2]['Opposite']+=1
for j in allegiance_matrix:
for i in allegiance_matrix[j]:
allegiance_matrix[j][i]['Same_perc']=np.round(allegiance_matrix[j][i]['Same']/allegiance_matrix[j][i]['Total'],3)
allegiance_matrix[j][i]['Opposite_perc']=np.round(allegiance_matrix[j][i]['Opposite']/allegiance_matrix[j][i]['Total'],3)
return allegiance_matrix
def clean_country(country):
if country=='Atlantic Ocean area':return 'Atlantic Ocean Area'
if country=='Baltic Sea area':return 'Baltic Sea Area'
if country=='Former Yugoslav Republic of Macedonia':return 'Macedonia'
if country=='Former Yugoslav Republic of Macedonia (FYROM)':return 'Macedonia'
if country=='Kosovo under UNSCR 1244/1999':return 'Kosovo'
if country=='Mediterranean Sea area':return 'Mediterranean Sea Area'
if country=='Moldova, Republic':return 'Moldova'
if country=='Montenegro, from 06/2006':return 'Montenegro'
if country=='Serbia, from 06/2006':return 'Serbia'
if country=='North Sea area':return 'North Sea Area'
return country
topic_descriptions={
'1 E':'Citizenship',
'1.1':'Citizen rights',
'1.2':'Citizen rights',
'2 I':'Market',
'2.1':'Trade',
'2.2':'Free movement',
'2.3':'Free movement',
'2.4':'Free movement',
'2.5':'Financial',
'2.6':'Monopolies',
'2.7':'Taxation',
'2.8':'Cooperation',
'3.1':'Agriculture',
'3.2':'Transport',
'3.3':'Communication',
'3.4':'Industry',
'3.5':'Research',
'3.6':'Energy',
'3.7':'Environment',
'4 E':'Equality',
'4.1':'Equality',
'4.2':'Health',
'4.3':'Civil society',
'4.4':'Education',
'4.5':'Tourism',
'4.6':'Consumers protection',
'4.7':'Regional policy',
'5.0':'Economy',
'5.1':'Economy',
'5.2':'Euro',
'6 E':'Foreign policy',
'6.1':'Foreign policy',
'6.2':'Foreign trade',
'6.3':'Foreign development',
'6.4':'Foreign relations',
'6.5':'Foreign aid',
'7 A':'Schengen area',
'7.1':'Schengen area',
'7.3':'Public security',
'7.4':'Justice',
'7.9':'Justice',
'8 S':'EU members',
'8.1':'EU members',
'8.2':'EU members',
'8.3':'EU members',
'8.4':'EU institutions',
'8.5':'EU law',
'8.6':'EU institutions',
'8.7':'EU budget'
}
def topic_converter(topic3):
if topic3 in topic_descriptions: return topic_descriptions[topic3]
else: return topic3
def clean_topic(topic):
return topic_converter(topic[:3])
def get_vote_topics(voteid):
topics=set()
for i in voteid_2_dossierid[voteid]:
if 'subject' in i:
for topic in i['subject']:
topics.add(clean_topic(topic))
elif 'sujet' in i:
for topic in i['sujet']:
topics.add(clean_topic(topic))
return list(topics)
def get_vote_areas(voteid):
on_countries=set()
for i in voteid_2_dossierid[voteid]:
if 'geographical_area' in i:
for on_country in i['geographical_area']:
on_countries.add(clean_country(on_country))
return list(on_countries)
all_vote_topics=set()
for v in voteid_2_dossierid:
for i in get_vote_topics(v):
all_vote_topics.add(i)
all_vote_topics
{'Agriculture', 'Citizen rights', 'Citizenship', 'Civil society', 'Communication', 'Consumers protection', 'Cooperation', 'EU budget', 'EU institutions', 'EU law', 'EU members', 'Economy', 'Education', 'Energy', 'Environment', 'Equality', 'Euro', 'Financial', 'Foreign aid', 'Foreign development', 'Foreign policy', 'Foreign relations', 'Foreign trade', 'Free movement', 'Health', 'Industry', 'Justice', 'Market', 'Monopolies', 'Public security', 'Regional policy', 'Research', 'Schengen area', 'Taxation', 'Tourism', 'Trade', 'Transport'}
all_vote_areas=set()
for v in voteid_2_dossierid:
for i in get_vote_areas(v):
all_vote_areas.add(i)
all_vote_areas
{'ACP Countries', 'Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Armenia', 'Atlantic Ocean Area', 'Australia', 'Austria', 'Azerbaijan', 'Baltic Sea Area', 'Bangladesh', 'Belarus', 'Belgium', 'Belize', 'Black Sea area', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burma', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo DR, ex-Zaire', 'Cook Islands', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czech Republic', "Côte d'Ivoire", 'Denmark', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Eritrea', 'Estonia', 'Ethiopia', 'Faroe Islands', 'Finland', 'France', 'French Guiana', 'Gabon', 'Georgia', 'Germany FR', 'Ghana', 'Greece', 'Greenland', 'Grenada', 'Guadeloupe', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kiribati', 'Korea, Republic', 'Kosovo', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Macao', 'Macedonia', 'Madagascar', 'Malaysia', 'Maldives', 'Malta', 'Marshall Islands', 'Martinique', 'Mauritania', 'Mauritius', 'Mayotte', 'Mediterranean Sea Area', 'Mexico', 'Micronesia', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Netherlands', 'New Zealand', 'Nicaragua', 'Nigeria', 'North Sea Area', 'Norway', 'Pakistan', 'Palau', 'Palestine Authority', 'Panama', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Romania', 'Russian Federation', 'Réunion', 'Saint Lucia', 'Saint Vincent and Grenadines', 'Samoa', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Korea', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Swaziland', 'Sweden', 'Switzerland', 'Syrian Arab Republic', 'Tajikistan', 'Thailand', 'Tibet', 'Timor-Leste', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela', 'Vietnam', 'Western Sahara', 'Yemen', 'Zimbabwe'}
eu_allegiance_matrix={}
for country in countries:
for year in sorted(eu_vt[country]):
print(country,year)
votes_area_topic={}
for vote in eu_vt[country][year]:
topics=get_vote_topics(vote['vote'])
areas=get_vote_areas(vote['vote'])
topics.append('All topics')
areas.append('All areas')
for area in areas:
for topic in topics:
if area not in votes_area_topic:votes_area_topic[area]={}
if topic not in votes_area_topic[area]:votes_area_topic[area][topic]=[]
votes_area_topic[area][topic].append(vote)
for area in sorted(votes_area_topic):
for topic in sorted(votes_area_topic[area]):
for allegiance_type1 in ['name','group','party']:
for allegiance_type2 in ['name','group','party']:
dummy=get_allegiance_matrix(allegiance_type1,votes_area_topic[area][topic],
eu_allegiance[country][year][allegiance_type2])
if dummy!={}:
if country not in eu_allegiance_matrix:
eu_allegiance_matrix[country]={}
if year not in eu_allegiance_matrix[country]:
eu_allegiance_matrix[country][year]={}
if area not in eu_allegiance_matrix[country][year]:
eu_allegiance_matrix[country][year][area]={}
if topic not in eu_allegiance_matrix[country][year][area]:
eu_allegiance_matrix[country][year][area][topic]={}
if allegiance_type1 not in eu_allegiance_matrix[country][year][area][topic]:
eu_allegiance_matrix[country][year][area][topic][allegiance_type1]={}
if allegiance_type2 not in eu_allegiance_matrix[country][year][area][topic][allegiance_type1]:
eu_allegiance_matrix[country][year][area][topic][allegiance_type1][allegiance_type2]={}
eu_allegiance_matrix[country][year][area][topic][allegiance_type1][allegiance_type2]=dummy
Hungary 2004 Hungary 2005 Hungary 2006 Hungary 2007 Hungary 2008 Hungary 2009 Hungary 2010 Hungary 2011 Hungary 2012 Hungary 2013 Hungary 2014 Hungary 2015 Hungary 2016 Hungary 2017 Hungary 2018 Hungary 2019 Joint 2004 Joint 2005 Joint 2006 Joint 2007 Joint 2008 Joint 2009 Joint 2010 Joint 2011 Joint 2012 Joint 2013 Joint 2014 Joint 2015 Joint 2016 Joint 2017 Joint 2018 Joint 2019 Romania 2004 Romania 2005 Romania 2006 Romania 2007 Romania 2008 Romania 2009 Romania 2010 Romania 2011 Romania 2012 Romania 2013 Romania 2014 Romania 2015 Romania 2016 Romania 2017 Romania 2018 Romania 2019
keys=['Same_perc']
areas=['All areas']
years=['2008','2013','2018']
eu_allegiance_list=[]
clean_names={'name':set(),'group':set(),'party':set()}
for country in sorted(eu_allegiance_matrix):
#for year in sorted(eu_allegiance_matrix[country]):
for year in years:
print(country,year)
#for area in sorted(eu_allegiance_matrix[country][year]):
for area in areas:
for topic in sorted(eu_allegiance_matrix[country][year][area]):
for allegiance_type1 in sorted(eu_allegiance_matrix[country][year][area][topic]):
for allegiance_type2 in sorted(eu_allegiance_matrix[country][year][area][topic][allegiance_type1]):
for name1 in sorted(eu_allegiance_matrix[country][year][area][topic][allegiance_type1][allegiance_type2]):
for name2 in sorted(eu_allegiance_matrix[country][year][area][topic][allegiance_type1][allegiance_type2][name1]):
if name1!=name2:
clean_names[allegiance_type1].add(name1)
clean_names[allegiance_type2].add(name2)
dummy={'country':country,
'year':year,
'allegiance_type1':allegiance_type1,
'allegiance_type2':allegiance_type2,
'area':area,
'topic':topic,
'name1':name1,
'name2':name2}
for key in keys:
dummy[key]=eu_allegiance_matrix[country][year][area][topic]\
[allegiance_type1][allegiance_type2][name1][name2][key]
eu_allegiance_list.append(dummy)
Hungary 2008 Hungary 2013 Hungary 2018 Joint 2008 Joint 2013 Joint 2018 Romania 2008 Romania 2013 Romania 2018
for i in clean_names:
clean_names[i]=sorted(list(clean_names[i]))
open('ep/export/json/clean_names.json','w').write(json.dumps(clean_names))
2869
open('ep/export/ignore/eu_allegiance_list.json','w').write(json.dumps(eu_allegiance_list))
146037860
zf = zipfile.ZipFile('ep/export/json/eu_allegiance_list.zip', mode='w')
zf.write('ep/export/ignore/eu_allegiance_list.json','data.json',compress_type=compression)
zf.close()
save_local(eu_allegiance_list,'allegiance_list')
save_local(eu_allegiance_matrix,'allegiance_matrix')
Clusterings
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
def dict_2_matrix(matrix,key,party_labels=False):
labels=sorted(matrix)
slabels=[]
for i in range(len(labels)):
label=labels[i]
if label in names:
if party_labels:
party=party_normalizer(names[label]['Constituencies'][0]['party'])
group=party_normalizer(names[label]['Groups'][0]['groupid'])
slabels.append(str(label)+u' | '+str(party)+' | '+str(group))
else:
slabels.append(label)
else:
slabels.append(label)
#extend to square matrix
inner_keys=matrix[sorted(matrix)[0]]
inner_keys=sorted(inner_keys[sorted(inner_keys)[0]])
for name1 in labels:
for name2 in labels:
if name2 not in matrix[name1]:
matrix[name1][name2]={i:0 for i in inner_keys}
return np.array([[matrix[name1][name2][key] for name2 in sorted(matrix[name1])] for name1 in labels]),slabels
def hier_cluster(matrix,level,th=1,key='Same_perc',party_labels=False,method='single', metric='euclidean',criterion='distance'):
X,labelList=dict_2_matrix(matrix[level][level],key,party_labels)
if len(X)>1:
try:
linked = linkage(X, method=method,metric=metric)
except:
try:
linked = linkage(X, method=method,metric='euclidean')
except:
linked = linkage(X, method='single',metric='euclidean')
f=fcluster(linked, th, criterion)
labelList=[labelList[i]+' | '+str(f[i]) for i in range(len(labelList))]
return linked,labelList
else:
return [],[]
Exctract clusters
def get_unique_parent_node(nodes_children,node):
if node in leafs:
return node
elif len(nodes_children[node])>1:
return node
else:
return get_unique_parent_node(nodes_children,nodes_children[node][0])
def get_unique_parent(node,node_dict,unique_node_set,root):
if node not in node_dict:
return root
elif node_dict[node] in unique_node_set:
return node_dict[node]
else:
return get_unique_parent(node_dict[node],node_dict,unique_node_set,root)
Save for VEGA
master_tree={}
nc_levels=10
key='Same_perc'
mpruned_nodes=[]
for country in countries:
#for year in eu_allegiance_matrix[country]:
for year in years:
print(country,year)
for area in eu_allegiance_matrix[country][year]:
for topic in eu_allegiance_matrix[country][year][area]:
for allegiance in eu_allegiance_matrix[country][year][area][topic]:
uid=country+year+allegiance+area+topic
cluster_list=[]
clusterdummy={}
for nc in range(2,nc_levels):
hc,hlabels=hier_cluster(eu_allegiance_matrix[country][year][area][topic],
allegiance,nc,key,True,'complete','seuclidean','maxclust')
for i in hlabels:
hi=i.split('|')
name=hi[0].strip()
cluster_no=hi[-1].strip()
if name not in clusterdummy:
clusterdummy[name]={}
clusterdummy[name]['name']=name
clusterdummy[name]['cluster_level_'+str(nc_levels)]=name
clusterdummy[name]['country']=country
clusterdummy[name]['cluster_level_1']=country
clusterdummy[name]['cluster_level_'+str(nc)]='c'+str(nc)+str(cluster_no)
cluster_list=list(clusterdummy.values())
#construct tree
leafs=sorted(clusterdummy)
nodes=[{'name':country}]
nodes_done=set()
nodes_children={}
for i in cluster_list:
for cluster_level in range(2,nc_levels+1):
node=i['cluster_level_'+str(cluster_level)]
parent=i['cluster_level_'+str(cluster_level-1)]
if node not in nodes_done:
dummy={}
nodes_done.add(node)
dummy['name']=node
dummy['parent']=parent
if parent not in nodes_children:nodes_children[parent]=[]
nodes_children[parent].append(node)
nodes.append(dummy)
#get unique nodes
node_dict={i['name']:i['parent'] for i in nodes[1:]}
unique_nodes={}
for node in nodes_children:
unique_nodes[node]=get_unique_parent_node(nodes_children,node)
unique_node_set=set(unique_nodes.values()).union(set(leafs))
#prune
pruned_nodes=[]
for i in nodes:
dummy=i
name=i['name']
if 'parent' not in i:
pruned_nodes.append(i)
elif i['name'] in unique_node_set:
dummy['parent']=get_unique_parent(name,node_dict,unique_node_set,nodes[0]['name'])
if name in leafs:
if allegiance=='name':
dummy['party']=party_normalizer(names[name]['Constituencies'][0]['party'])
dummy['group']=party_normalizer(names[name]['Groups'][0]['groupid'])
dummy['party2']=party_normalizer2(names[name]['Constituencies'][0]['party'])
dummy['group2']=party_normalizer2(names[name]['Groups'][0]['groupid'])
else:
dummy['party']=''
dummy['group']=''
dummy['party2']=''
dummy['group2']=''
dummy['image']=get_photo(name,names,allegiance)
dummy['url']=get_url(name,names,allegiance)
pruned_nodes.append(dummy)
for i in pruned_nodes:
dummy=i
if 'party' in dummy:
dummy['partycolor']=get_link_color(dummy['party'])
if 'group' in dummy:
dummy['groupcolor']=get_link_color(dummy['group'])
dummy['country']=country
dummy['year']=year
dummy['area']=area
dummy['topic']=topic
dummy['allegiance']=allegiance
mpruned_nodes.append(dummy)
Hungary 2008 Hungary 2013 Hungary 2018 Joint 2008 Joint 2013 Joint 2018 Romania 2008 Romania 2013 Romania 2018
open('ep/export/json/topics.json','w').write(json.dumps(mpruned_nodes))
17935513
save_local(mpruned_nodes,'topics')