In [76]:
import pandas as pd, numpy as np, json, os
import matplotlib.pyplot as plt
%matplotlib inline
In [77]:
plt.style.use('seaborn-whitegrid')
plt.style.available
Out[77]:
['bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark-palette',
 'seaborn-dark',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'seaborn',
 'Solarize_Light2',
 'tableau-colorblind10',
 '_classic_test']
In [78]:
#!pip install d3IpyPlus
from d3IpyPlus import ScatterPlot, LinePlot, BarPlot, StackedArea
In [79]:
#!pip install pyecharts
In [80]:
m=json.loads(open('ignore/member_timelines.json','r').read())
members=json.loads(open('ignore/member_simple.json','r').read())
In [81]:
d=pd.DataFrame(members['deaths'],index=['v']).T
In [82]:
d['v']=' †'
In [83]:
s=pd.DataFrame(members['s2'],index=['s']).T
s=s.join(d)
s['v']=s['v'].fillna('')
s['s']=s['s']+s['v']
s=s[['s']]
s['w']=s['s'].str.split(' ').str[0].str[0]+s['s'].str.split(' ').str[-1].str[0]
In [84]:
p=pd.DataFrame(members['part'],index=['Part ']).T
p.head()
Out[84]:
Part
Zofota Severin | 1933-03-28 ⚪️ FSN - Frontul Salvării Naţionale
Comisel Constanta | 1934-04-30 ⚪️ FSN - Frontul Salvării Naţionale
Capatina Octavian-Dan | 1948-04-17 🕊️ PUNR - Partidul Unităţii Naţionale Române
Bangu Corvin-Laurentiu | Ismeretlen ⚪️ FSN - Frontul Salvării Naţionale
Rosculet Radu Voicu | 1939-10-24 📐 PNL - Partidul Naţional Liberal
In [85]:
szd_color='#E91C62'
In [86]:
pcolors=json.loads(open('json/pcolors.json','r').read())
p['Color']=p['Part '].str.split(' ').str[1].str.strip()
p['Part  ']=p['Part '].str.split('-').str[0].str.strip()
p=p.join(pd.DataFrame(pcolors,index=['cvalue']).T,on='Color')
In [87]:
g=pd.DataFrame(members['megye'],index=['Megye ']).T
g.head()
Out[87]:
Megye
Arama Viorel | 1938-03-10 🇷🇴 Bákó
Pop Petru | 1939-03-01 🇷🇴 Argeș
Les Ioan | 1947-09-01 🇷🇴 Szeben
Carp Mihai | 1956-05-20 🇷🇴 Bihar
Craciun Gheorghe | 1954-12-13 🇷🇴 Olt
In [88]:
def replace_all(text, dic):
    for i in dic:
        text = text.replace(i, dic[i])
    return text
In [89]:
medals={'🥈':2,'🥇':3,'🥉':1.5,'🏅':1.25,'🚩':1.5}
medals_none={i:'' for i in medals}

bizotts={'Parlamenti Iroda':2,
         'Állambiztonság':2,
         'Állami ügyek':1.5,
         'Választások':1.5,
         'Parlament':1.5,
         'Pénzügy':1.5,
         'Biztonság':1.5,
         '🇺🇳':2,
         '🇺🇸':1.5,
         '🇫🇷':1.5,
         '🇮🇱':1.5,
         '🇪🇸':1.5,
         '🇮🇹':1.5,
         '🇬🇧':1.5,
        '🏅🔼 Szenátus':2,
        '🏅🔽 Képviselőház':2,
        '🥇🔼 Szenátus':5,
        '🥇🔽 Képviselőház':5,
        '🥈🔼 Szenátus':4,
        '🥈🔽 Képviselőház':4,
        '🥉🔼 Szenátus':3,
        '🥉🔽 Képviselőház':3}
In [90]:
data={'Ország':{},'Bizottság':{},'Párt':{},'Megye':{}}
attention={}
for k,i in enumerate(m):
    if k%1000==0: print(k/len(m)*100,'%')
    name=i['Simple']
    if i['Típus'] in list(data.keys()):
        csoport=i['Csoportok']
        csoport=replace_all(csoport,medals_none)
        if csoport not in data[i['Típus']]:data[i['Típus']][csoport]={}
        for t in pd.date_range(i['start'],i['end']):
            st=str(t)[:10]
            if st not in data[i['Típus']][csoport]:data[i['Típus']][csoport][st]=set()
            data[i['Típus']][csoport][st].add(name)            
            if name not in attention:attention[name]={}
            if st not in attention[name]:attention[name][st]=0
            v=1
            for j in medals:
                if j in i['Csoportok']:
                    v*=medals[j]
            for j in bizotts:
                if j in i['Csoportok']:
                    v*=bizotts[j]
            attention[name][st]+=v
0.0 %
2.9080757262919126 %
5.816151452583825 %
8.724227178875738 %
11.63230290516765 %
14.540378631459564 %
17.448454357751476 %
20.35653008404339 %
23.2646058103353 %
26.172681536627213 %
29.08075726291913 %
31.98883298921104 %
34.89690871550295 %
37.804984441794865 %
40.71306016808678 %
43.62113589437869 %
46.5292116206706 %
49.43728734696251 %
52.345363073254426 %
55.25343879954634 %
58.16151452583826 %
61.06959025213017 %
63.97766597842208 %
66.88574170471398 %
69.7938174310059 %
72.70189315729782 %
75.60996888358973 %
78.51804460988164 %
81.42612033617355 %
84.33419606246547 %
87.24227178875738 %
90.15034751504929 %
93.0584232413412 %
95.96649896763311 %
98.87457469392503 %
In [91]:
df=pd.DataFrame(attention)
In [92]:
pd.to_datetime('2019-07-01')-pd.to_datetime('2018-01-24')
Out[92]:
Timedelta('523 days 00:00:00')
In [93]:
counts=df.count().sort_values(ascending=False)
In [94]:
counts.head(7)
Out[94]:
Marton Arpad-Francisc | 1955-03-25        10556
Verestoy Attila | 1954-03-01              10040
Dumitrescu Cristian-Sorin | 1955-04-24     9711
Seres Denes | 1953-07-21                   9711
Nicolicea Eugen | 1956-06-06               9704
Marko Bela | 1951-09-08                    9641
Kerekes Karoly | 1947-12-14                9634
dtype: int64
In [95]:
counts2=df.max().sort_values(ascending=False)
In [96]:
counts2={}
for i in df.columns:
    counts2[i]=df[i].last_valid_index()
counts2=pd.DataFrame(counts2,index=[0]).T[0]
In [97]:
counts2.head(7)
Out[97]:
Dutu Ion | 1942-10-07                 2000-07-10
Berciu Ion | 1940-01-17               2000-11-30
Otiman Paun-Ion | 1942-05-28          2004-11-30
Popa Mihaela | 1962-04-16             2016-12-21
Brezniceanu Alexandru | 1941-06-19    2000-11-30
Hoara Constantin Emil | 1942-08-22    1996-11-22
Marko Bela | 1951-09-08               2016-12-21
Name: 0, dtype: object
In [98]:
min_time=365*6
min_time1=365*4
min_dep=2
In [99]:
de=pd.DataFrame(counts).join(p)
de=de[de[0]>min_time1]
dd=de.groupby('Part ').nunique()
de=de.groupby('Part ').mean()
de=np.round(de.loc[dd[dd[0]>min_dep].index]/365,1)
de.columns=['Atlagos regiseg']
de=de.join(p.set_index('Part ').drop_duplicates())
dj=de.groupby(['Part  ','cvalue']).mean().reset_index().sort_values(by='Atlagos regiseg', ascending=False)
In [100]:
de
Out[100]:
Atlagos regiseg Color Part cvalue
Part
✳️ PNTCD - Partidul Naţional Ţărănesc Creştin Democrat 5.3 PNTCD ✳️ PNTCD #04843c
🌷 RMDSZ - Romániai Magyar Demokrata Szövetség 10.0 RMDSZ 🌷 RMDSZ green
🌹 PDSR - Partidul Democraţiei Sociale din România 5.3 PDSR 🌹 PDSR #dc2c24
🌹 PSD - Partidul Social Democrat 9.0 PSD 🌹 PSD #dc2c24
🌹 PSDR - Partidul Socialist Democratic din România 6.3 PSDR 🌹 PSDR #dc2c24
🌹 PSM - Partidul Socialist al Muncii 4.3 PSM 🌹 PSM #dc2c24
🍀 PER - Partidul Ecologist Român 7.2 PER 🍀 PER #69a54f
🍏 PMP - Partidul Mişcarea Populară 8.3 PMP 🍏 PMP #0484cc
👤 Independent - Independent 6.4 Independent 👤 Independent #555
📐 PNL - Partidul Naţional Liberal 7.6 PNL 📐 PNL #f4d42c
📐 PNL-CD - Partidul Naţional Liberal 5.1 PNL-CD 📐 PNL #f4d42c
🔱 PRM - Partidul România Mare 7.4 PRM 🔱 PRM #0d5ca4
🔶 PD - Partidul Democrat 7.3 PD 🔶 PD darkOrange
🔶 PDL - Partidul Democrat Liberal 5.7 PDL 🔶 PDL darkOrange
🕊️ ALDE - Partidul Alianţa Liberalilor şi Democraţilor 11.1 ALDE 🕊️ ALDE #046cab
🕊️ PC - Partidul Conservator 8.7 PC 🕊️ PC #96c2f2
🕊️ PUNR - Partidul Unităţii Naţionale Române 5.1 PUNR 🕊️ PUNR grey
🦅 UNPR - Uniunea Naţională pentru Progresul României 5.6 UNPR 🦅 UNPR #a8190f
In [101]:
sample_data = list(de.reset_index().T.to_dict().values())
scplot = BarPlot(
    x='Part  ', 
    y={'value':'Atlagos regiseg','label':'Atlagos regiseg (ev)'}, 
    id='Part  ', 
    aggs={'Atlagos regiseg':'mean'},
    text=' ', 
    color='cvalue', 
    order={'value':'Atlagos regiseg','sort':'desc'},
    tooltip=['Part '], 
    legend=False, 
    width='100%', 
)
scplot.draw(sample_data)
scplot.height='100%'
open('time1.html','w').write(scplot.dump_html(sample_data))
Out[101]:
3900
In [102]:
from IPython.display import IFrame
In [103]:
from pyecharts.charts import Bar, Grid
from pyecharts import options as opts
#docs https://pyecharts.org/#/en-us/
In [104]:
bar = (
    Bar(
        init_opts=opts.InitOpts(
            width='100%',
            height='410px'
        )
    )
    .add_xaxis(list(dj['Part  '].values))
    .add_yaxis('Átlagos régiség',
               [opts.BarItem(
                   name=i[1]['Part  '],
                   value=np.round(i[1]['Atlagos regiseg'],1),
                   itemstyle_opts=opts.ItemStyleOpts(color=i[1]['cvalue']),
                ) for i in dj.T.iteritems()]
              )
    .set_global_opts(
        legend_opts=opts.LegendOpts(is_show=False),
        title_opts=opts.TitleOpts(
            title="Parlamenterek átlagos régisége",
            pos_left='center',
            pos_top='20'
        ),
        xaxis_opts=opts.AxisOpts(
            type_='category',
            axislabel_opts =opts.LabelOpts(
                rotate=-50,
                font_weight='normal'
            )
        ),
#         graphic_opts=[
#                 opts.GraphicImage(
#                     graphic_item=opts.GraphicItem(
#                         id_="logo",
#                         right=50,
#                         top=50,
#                         z=-10,
#                         bounding="raw",
#                         origin=[75, 75],
#                     ),
#                     graphic_imagestyle_opts=opts.GraphicImageStyleOpts(
#                         image="https://szekelydata.csaladen.es/favicon.ico",
#                         width=25,
#                         height=25,
#                         opacity=0.8,
#                     ),
#                 )
#             ],
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(
        )
    )
    .render('time1e.html')
)
IFrame(src='time1e.html', width='98%', height=440)
Out[104]:
In [105]:
from plotly import offline as po
import plotly.plotly as py
import plotly.graph_objs as go

trace0 = go.Bar(
    x=list(dj['Part  '].values),
    y=np.round(np.array(dj['Atlagos regiseg'].values),1),
    marker=dict(
        color=list(dj['cvalue'].values)
    ),
)

data = [trace0]

fig = go.Figure(data=data)
file_name='time1c.html'
po.plot(fig, filename=file_name, auto_open=False);
IFrame(src=file_name, width='98%', height=420)
Out[105]:
In [106]:
de=pd.DataFrame(counts).join(g)
de=de[de[0]>min_time1]
dd=de.groupby('Megye ').nunique()
de=de.groupby('Megye ').mean()
de=np.round(de.loc[dd[dd[0]>min_dep].index]/365,1)
de.columns=['Atlagos regiseg']
dj=de.groupby(['Megye ']).mean().reset_index().sort_values(by='Atlagos regiseg', ascending=False)
In [107]:
ro_shape=json.loads(open('json/romania-counties.json','r').read())
megyek=json.loads(open('C:/users/csala/Onedrive/Github/universal/ro/hun_megyek.json','r').read())
megye_map={}
for i,di in enumerate(ro_shape['objects']['ROU_adm1']['geometries']):
    if (di['properties']['VARNAME_1']):
        megye_map[i]=di['properties']['VARNAME_1']
    else:
        megye_map[i]=di['properties']['NAME_1']
megye_map={megyek[megye_map[i]]:i for i in megye_map}
dj['id']=[megye_map[i]  if i in megye_map else -1 for i in dj['Megye '].str.split(' ').str[1].values]
open('json/megye_map.json','w').write(json.dumps(list(dj.T.to_dict().values())))
Out[107]:
3629
In [108]:
sample_data = list(de.reset_index().T.to_dict().values())
scplot = BarPlot(
    x='Megye ', 
    y={'value':'Atlagos regiseg','label':'Atlagos regiseg (ev)'}, 
    id='Megye ', 
    aggs={'Atlagos regiseg':'mean'},
    text=' ', 
#     color='cvalue', 
    order={'value':'Atlagos regiseg','sort':'desc'},
#     tooltip=['Part '], 
    legend=False, 
    width='100%',    
)
scplot.draw(sample_data)
scplot.height='100%'
open('time2.html','w').write(scplot.dump_html(sample_data))
Out[108]:
4089
In [109]:
bar = (
    Bar(
        init_opts=opts.InitOpts(
            width='100%',
            height='410px'
        )
    )
    .add_xaxis(list(dj['Megye '].values))
    .add_yaxis('Átlagos régiség',
               [opts.BarItem(
                   name=i[1]['Megye '],
                   value=np.round(i[1]['Atlagos regiseg'],1),
                   itemstyle_opts=opts.ItemStyleOpts(color=szd_color),
                ) for i in dj.T.iteritems()]
              )
    .set_global_opts(
        legend_opts=opts.LegendOpts(is_show=False),
        xaxis_opts=opts.AxisOpts(
            type_='category',
            axislabel_opts =opts.LabelOpts(
                rotate=-90,
                font_weight='normal'
            ),
#             offset=0
            
        ),
#         graphic_opts=[
#                 opts.GraphicImage(
#                     graphic_item=opts.GraphicItem(
#                         id_="logo",
#                         right=50,
#                         top=50,
#                         z=-10,
#                         bounding="raw",
#                         origin=[75, 75],
#                     ),
#                     graphic_imagestyle_opts=opts.GraphicImageStyleOpts(
#                         image="https://szekelydata.csaladen.es/favicon.ico",
#                         width=25,
#                         height=25,
#                         opacity=0.8,
#                     ),
#                 )
#             ],
    )
    .set_series_opts(
        label_opts=opts.LabelOpts(
        )
    )
    .render('time2e.html')
)
IFrame(src='time2e.html', width='98%', height=440)
Out[109]:
In [110]:
from plotly import offline as po
import plotly.plotly as py
import plotly.graph_objs as go

trace0 = go.Bar(
    x=list(dj['Megye '].values),
    y=np.round(np.array(dj['Atlagos regiseg'].values),1)
)

data = [trace0]

fig = go.Figure(data=data)
file_name='time2c.html'
po.plot(fig, filename=file_name, auto_open=False);
IFrame(src=file_name, width='98%', height=420)
Out[110]:
In [111]:
de=pd.DataFrame(counts2).join(p).join(d)
de=de.reset_index()
de=de[[str(i)=='nan' for i in list(de['v'].values)]] #keep only alive ones
de['szul']=de['index'].str.split('|').str[1].str.strip()
de=de[[str(i)!='Ismeretlen' for i in list(de['szul'].values)]] #keep only known birth dates
de['Eletkor']=(pd.to_datetime(de[0])-pd.to_datetime(de['szul'])).dt.days/365
dd=de.groupby('Part ').nunique()
de=de.groupby('Part ').mean()[['Eletkor']]
de=np.round(de.loc[dd[dd[0]>min_dep].index],0)
de.columns=['Atlagos eletkor']
de=de.join(p.set_index('Part ').drop_duplicates())
dj=de.groupby(['Part  ','cvalue']).mean().reset_index().sort_values(by='Atlagos eletkor', ascending=False)
In [112]:
sample_data = list(de.reset_index().T.to_dict().values())
scplot = BarPlot(
    x='Part  ', 
    y='Atlagos eletkor', 
    id='Part  ', 
    aggs={'Atlagos eletkor':'mean'},
   text=' ', 
    color='cvalue', 
    order={'value':'Atlagos eletkor','sort':'desc'},
    tooltip=['Part '], 
    legend=False, 
    width='100%', 
)
scplot.draw(sample_data)
scplot.height='100%'
open('time3.html','w').write(scplot.dump_html(sample_data))
Out[112]:
3847
In [113]:
from plotly import offline as po
import plotly.plotly as py
import plotly.graph_objs as go

trace0 = go.Bar(
    x=list(dj['Part  '].values),
    y=np.round(np.array(dj['Atlagos eletkor'].values),0),
    marker=dict(
        color=list(dj['cvalue'].values)
    ),
)

data = [trace0]

fig = go.Figure(data=data)
file_name='time3c.html'
po.plot(fig, filename=file_name, auto_open=False);
IFrame(src=file_name, width='98%', height=420)
Out[113]:
In [114]:
de=pd.DataFrame(counts2).join(g).join(d)
de=de.reset_index()
de=de[[str(i)=='nan' for i in list(de['v'].values)]] #keep only alive ones
de['szul']=de['index'].str.split('|').str[1].str.strip()
de=de[[str(i)!='Ismeretlen' for i in list(de['szul'].values)]] #keep only known birth dates
de['Eletkor']=(pd.to_datetime(de[0])-pd.to_datetime(de['szul'])).dt.days/365
dd=de.groupby('Megye ').nunique()
de=de.groupby('Megye ').mean()[['Eletkor']]
de=np.round(de.loc[dd[dd[0]>min_dep].index],0)
de.columns=['Atlagos eletkor']
dj=de.groupby(['Megye ']).mean().reset_index().sort_values(by='Atlagos eletkor', ascending=False)
In [115]:
ro_shape=json.loads(open('json/romania-counties.json','r').read())
megyek=json.loads(open('C:/users/csala/Onedrive/Github/universal/ro/hun_megyek.json','r').read())
megye_map={}
for i,di in enumerate(ro_shape['objects']['ROU_adm1']['geometries']):
    if (di['properties']['VARNAME_1']):
        megye_map[i]=di['properties']['VARNAME_1']
    else:
        megye_map[i]=di['properties']['NAME_1']
megye_map={megyek[megye_map[i]]:i for i in megye_map}
dj['id']=[megye_map[i]  if i in megye_map else -1 for i in dj['Megye '].str.split(' ').str[1].values]
open('json/megye_map2.json','w').write(json.dumps(list(dj.T.to_dict().values())))
Out[115]:
3670
In [116]:
sample_data = list(de.reset_index().T.to_dict().values())
scplot = BarPlot(
    x='Megye ', 
    y='Atlagos eletkor', 
    id='Megye ', 
    aggs={'Atlagos eletkor':'mean'},
    text=' ', 
#     color='cvalue', 
    order={'value':'Atlagos eletkor','sort':'desc'},
#     tooltip=['Part '], 
    legend=False, 
    width='100%',    
)
scplot.draw(sample_data)
scplot.height='100%'
open('time4.html','w').write(scplot.dump_html(sample_data))
Out[116]:
4091
In [117]:
from plotly import offline as po
import plotly.plotly as py
import plotly.graph_objs as go

trace0 = go.Bar(
    x=list(dj['Megye '].values),
    y=np.round(np.array(dj['Atlagos eletkor'].values),0)
)

data = [trace0]

fig = go.Figure(data=data)
file_name='time4c.html'
po.plot(fig, filename=file_name, auto_open=False);
IFrame(src=file_name, width='98%', height=420)
Out[117]:

Fusions

In [118]:
switches={}
fusions={}
for k,i in enumerate(m):
    if k%1000==0: print(k/len(m)*100,'%')
    name=i['Simple']
    if i['Típus'] in ['Megye','Párt']:
        for t in pd.date_range(i['start'],i['end']):
            st=str(t)[:10]
            if name not in switches:switches[name]={}
            if i['Típus']not in switches[name]:switches[name][i['Típus']]={'count':0,'current':i['Csoportok']}
            if switches[name][i['Típus']]['current']!=i['Csoportok']:
                p1=i['Csoportok']
                p2=switches[name][i['Típus']]['current']
                if i['Típus'] not in fusions:fusions[i['Típus']]={}
                if p1 not in fusions[i['Típus']]:fusions[i['Típus']][p1]={}
                if p2 not in fusions[i['Típus']][p1]:fusions[i['Típus']][p1][p2]=0
                fusions[i['Típus']][p1][p2]+=1
                switches[name][i['Típus']]['current']=str(i['Csoportok'])
                switches[name][i['Típus']]['count']+=1
0.0 %
2.9080757262919126 %
5.816151452583825 %
8.724227178875738 %
11.63230290516765 %
14.540378631459564 %
17.448454357751476 %
20.35653008404339 %
23.2646058103353 %
26.172681536627213 %
29.08075726291913 %
31.98883298921104 %
34.89690871550295 %
37.804984441794865 %
40.71306016808678 %
43.62113589437869 %
46.5292116206706 %
49.43728734696251 %
52.345363073254426 %
55.25343879954634 %
58.16151452583826 %
61.06959025213017 %
63.97766597842208 %
66.88574170471398 %
69.7938174310059 %
72.70189315729782 %
75.60996888358973 %
78.51804460988164 %
81.42612033617355 %
84.33419606246547 %
87.24227178875738 %
90.15034751504929 %
93.0584232413412 %
95.96649896763311 %
98.87457469392503 %
In [119]:
def get_sub(dz,x,k=1):
    part=dz.T[x].sort_values(ascending=False)
    print(part.mean())
    print(part.head(5))
    df=pd.DataFrame(part).join(pd.DataFrame(counts)).join(s)
    df['a']=df[x]/(df[0]/k)
    df=df.replace([np.inf, -np.inf], np.nan)
    df=df[df[0]>min_time] #remove outliers
    part=df.sort_values(by='a',ascending=False)[['a','s','w']]
    return part
In [120]:
dw=pd.DataFrame({i:{j:switches[i][j]['count'] for j in switches[i]} for i in switches})
part=get_sub(dw,'Párt',365)
megye=get_sub(dw,'Megye',365)
0.5197224251278305
Timis Ioan | 1951-09-17                 6
Gradinaru Nicolae | 1953-11-05          5
Ion Vasile | 1950-01-12                 5
Codirla Liviu | 1968-01-25              5
Iliescu Valentin Adrian | 1961-11-01    5
Name: Párt, dtype: int64
0.06829802775748722
Niculescu Duvaz Bogdan Nicolae | 1948-12-14    3
Banicioiu Nicolae | 1979-03-26                 3
Mihalcescu Carmen-Ileana | 1970-05-14          3
Nicolaescu Sergiu-Florin | 1930-04-13          3
Nicolae Serban | 1968-04-05                    2
Name: Megye, dtype: int64
In [121]:
top=15
fig,axes=plt.subplots(1,2,figsize=(11,3))
to_plot=part.set_index('s')['a']
ax=axes[0]
print(to_plot.mean())
ax=to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color)
ax.set_xlabel("")
ax.set_ylabel("Pártváltás / év")
to_plot=megye.set_index('s')['a']
ax=axes[1]
print(to_plot.mean())
to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color)
ax.set_xlabel("")
ax.set_ylabel("Megyeváltás / év")
plt.savefig('fig/valtas.png',bbox_inches='tight',transparent=True)
plt.show()
0.10610293643790332
0.022165929140400432

Fusions

In [122]:
fusions['part']=fusions['Párt']
fusions.pop('Párt');
open('json/fusions.json','w').write(json.dumps(fusions))
Out[122]:
14854
In [123]:
switches2={}
fusions2={}
for k,i in enumerate(m):
    if k%1000==0: print(k/len(m)*100,'%')
    name=i['Simple']
    if i['Típus'] in ['Párt']:
        if not ((i['Típus']=='Párt')and(i['Csoportok']=='👤 Independent - Independent')):        
            for t in pd.date_range(i['start'],i['end']):
                st=str(t)[:10]
                if name not in switches2:switches2[name]={}
                if i['Típus']not in switches2[name]:switches2[name][i['Típus']]={'count':0,'current':i['Csoportok']}
                if switches2[name][i['Típus']]['current']!=i['Csoportok']:
                    p1=i['Csoportok']
                    p2=switches2[name][i['Típus']]['current']
                    if i['Típus'] not in fusions2:fusions2[i['Típus']]={}
                    if p1 not in fusions2[i['Típus']]:fusions2[i['Típus']][p1]={}
                    if p2 not in fusions2[i['Típus']][p1]:fusions2[i['Típus']][p1][p2]=0
                    fusions2[i['Típus']][p1][p2]+=1
                    switches2[name][i['Típus']]['current']=str(i['Csoportok'])
                    switches2[name][i['Típus']]['count']+=1
0.0 %
2.9080757262919126 %
5.816151452583825 %
8.724227178875738 %
11.63230290516765 %
14.540378631459564 %
17.448454357751476 %
20.35653008404339 %
23.2646058103353 %
26.172681536627213 %
29.08075726291913 %
31.98883298921104 %
34.89690871550295 %
37.804984441794865 %
40.71306016808678 %
43.62113589437869 %
46.5292116206706 %
49.43728734696251 %
52.345363073254426 %
55.25343879954634 %
58.16151452583826 %
61.06959025213017 %
63.97766597842208 %
66.88574170471398 %
69.7938174310059 %
72.70189315729782 %
75.60996888358973 %
78.51804460988164 %
81.42612033617355 %
84.33419606246547 %
87.24227178875738 %
90.15034751504929 %
93.0584232413412 %
95.96649896763311 %
98.87457469392503 %
In [124]:
fusions2['part']=fusions2['Párt']
fusions2.pop('Párt');
open('json/fusions2.json','w').write(json.dumps(fusions2))
Out[124]:
7478

Orszag

In [125]:
countries={}
for k,i in enumerate(m):
    if k%1000==0: print(k/len(m)*100,'%')
    name=i['Simple']
    if i['Típus'] in ['Ország','Bizottság']:
        for t in pd.date_range(i['start'],i['end']):
            st=str(t)[:10]
            if name not in countries:countries[name]={}
            if i['Típus'] not in countries[name]:countries[name][i['Típus']]={}
            if i['Csoportok'] not in countries[name][i['Típus']]:countries[name][i['Típus']][i['Csoportok']]=0
            countries[name][i['Típus']][i['Csoportok']]+=1
0.0 %
2.9080757262919126 %
5.816151452583825 %
8.724227178875738 %
11.63230290516765 %
14.540378631459564 %
17.448454357751476 %
20.35653008404339 %
23.2646058103353 %
26.172681536627213 %
29.08075726291913 %
31.98883298921104 %
34.89690871550295 %
37.804984441794865 %
40.71306016808678 %
43.62113589437869 %
46.5292116206706 %
49.43728734696251 %
52.345363073254426 %
55.25343879954634 %
58.16151452583826 %
61.06959025213017 %
63.97766597842208 %
66.88574170471398 %
69.7938174310059 %
72.70189315729782 %
75.60996888358973 %
78.51804460988164 %
81.42612033617355 %
84.33419606246547 %
87.24227178875738 %
90.15034751504929 %
93.0584232413412 %
95.96649896763311 %
98.87457469392503 %
In [126]:
dz=pd.DataFrame({i:{j:sum(countries[i][j].values()) for j in countries[i]} for i in countries}).fillna(0)
dz2=pd.DataFrame({i:{j:len(countries[i][j].values()) for j in countries[i]} for i in countries}).fillna(0)
In [127]:
orsz1=get_sub(dz2,'Ország')
biz1=get_sub(dz2,'Bizottság')
3.5992753623188407
Iordache Florin | 1960-12-14    20.0
Movila Petru | 1967-10-26       19.0
Stanciu Anghel | 1949-08-15     18.0
Kerekes Karoly | 1947-12-14     18.0
Ion Vasile | 1950-01-12         18.0
Name: Ország, dtype: float64
2.321376811594203
Verestoy Attila | 1954-03-01              12.0
Zgonea Valeriu Stefan | 1967-09-03        12.0
Nicolicea Eugen | 1956-06-06              12.0
Dumitrescu Cristian-Sorin | 1955-04-24    11.0
Rusanu Dan Radu | 1950-05-25              11.0
Name: Bizottság, dtype: float64
In [128]:
orsz2=get_sub(dz,'Ország')
biz2=get_sub(dz,'Bizottság')
4342.807246376811
Kerekes Karoly | 1947-12-14     29925.0
Iordache Florin | 1960-12-14    26543.0
Peres Alexandru | 1952-12-20    26409.0
Movila Petru | 1967-10-26       26132.0
Tudose Mihai | 1967-03-06       26062.0
Name: Ország, dtype: float64
3404.0159420289856
Verestoy Attila | 1954-03-01          28577.0
Nicolicea Eugen | 1956-06-06          28122.0
Zgonea Valeriu Stefan | 1967-09-03    25524.0
Marton Arpad-Francisc | 1955-03-25    24135.0
Iordache Florin | 1960-12-14          23661.0
Name: Bizottság, dtype: float64
In [129]:
top=15
fig,axes=plt.subplots(1,2,figsize=(11,3))
to_plot=orsz2.set_index('s')['a']
ax=axes[0]
print(to_plot.mean())
to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color)
ax.set_xlabel("")
ax.set_ylabel("Ország-barátság-csoport / év")
to_plot=biz2.set_index('s')['a']
ax=axes[1]
print(to_plot.mean())
to_plot.head(top).plot(kind='bar',ax=ax,color=szd_color)
ax.set_xlabel("")
ax.set_ylabel("Bizottság / év")
plt.savefig('fig/valtas2.png',bbox_inches='tight',transparent=True)
plt.show()
2.3183889232661565
1.8392387863791064

Scatter

In [130]:
part2=pd.DataFrame(part[['a','w','s']])
part2.columns=['Part','Nev','Nev2']
megye2=pd.DataFrame(megye['a'])
megye2.columns=['Megye']
sc1=part2.join(megye2).join(p).join(g).join(pd.DataFrame(counts/365,columns=['Evek']))
In [131]:
orsz3=pd.DataFrame(orsz2[['a','w','s']])
orsz3.columns=['Orszag','Nev','Nev2']
biz3=pd.DataFrame(biz2[['a']])
biz3.columns=['Bizottsag']
sc2=orsz3.join(biz3).join(p).join(g).join(pd.DataFrame(counts/365,columns=['Evek']))
In [132]:
fig,axes=plt.subplots(1,2,figsize=(9,4))
ax=axes[0]
sc1.plot(x='Part',y='Megye',kind='scatter',ax=ax,color=szd_color)
ax.set_xlabel('Pártváltás / év')
ax.set_ylabel('Megyeváltás / év')
ax=axes[1]
sc2.plot(x='Orszag',y='Bizottsag',kind='scatter',ax=ax,color=szd_color)
ax.set_xlabel('Ország-barátság-csoport / év')
ax.set_ylabel('Bizottság / év')
plt.savefig('fig/valtas3.png',bbox_inches='tight',transparent=True)
plt.show()
In [133]:
sample_data = list(sc1.reset_index().T.to_dict().values())
scplot = ScatterPlot(x={'value':'Part','label':'Partvaltas / ev'}, 
                     y={'value':'Megye','label':'Megyevaltas / ev'}, id='index', 
                     text='Nev', 
                     color='cvalue', 
#                      order={'value':'Evek','sort':'asc'},
                     tooltip=['Nev2', 'Part  ','Megye '], 
                     legend=False, width='100%', size='Evek')
scplot.draw(sample_data)
open('scatter1b.html','w').write(scplot.dump_html(sample_data))
Out[133]:
263160
In [134]:
sample_data = list(sc2.reset_index().T.to_dict().values())
scplot = ScatterPlot(x={'value':'Orszag','label':'Orszag-baratsag-csoport'}, 
                     y={'value':'Bizottsag','label':'Bizottsag'}, id='index', 
                     text='Nev', 
                     color='cvalue',  
#                      order={'value':'Evek','sort':'asc'},
                     tooltip=['Nev2', 'Part  ','Megye '], 
                     legend=False, width='100%', size='Evek')
scplot.draw(sample_data)
open('scatter2b.html','w').write(scplot.dump_html(sample_data))
Out[134]:
285243
In [ ]:
 
In [ ]: