#!/usr/bin/env python # coding: utf-8 # In[102]: import pandas as pd, numpy as np, json, os import networkx as nx import operator import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[103]: plt.style.use('seaborn-whitegrid') # In[104]: m=json.loads(open('ignore/member_timelines.json','r').read()) # In[105]: set([i['Csoportok'] for i in m if i['Típus']=='Párt']) # In[1]: set([i['Csoportok'] for i in m if i['Típus']=='Kamara']) # In[133]: set([i['Csoportok'] for i in m if i['Típus']=='Bizottság']) # In[106]: def replace_all(text, dic): for i in dic: text = text.replace(i, dic[i]) return text # In[75]: medals={'🥈':2,'🥇':3,'🥉':1.5,'🏅':1.25,'🚩':1.5} medals_none={i:'' for i in medals} # In[111]: data={'Ország':{},'Bizottság':{},'Párt':{},'Megye':{}} attention={} for k,i in enumerate(m): if k%1000==0: print(k/len(m)*100,'%') name=i['Simple'] if i['Típus'] in list(data.keys()): csoport=i['Csoportok'] csoport=replace_all(csoport,medals_none) if csoport not in data[i['Típus']]:data[i['Típus']][csoport]={} for t in pd.date_range(i['start'],i['end']): st=str(t)[:10] if st not in data[i['Típus']][csoport]:data[i['Típus']][csoport][st]=set() data[i['Típus']][csoport][st].add(name) if name not in attention:attention[name]={} if csoport not in attention[name]:attention[name][csoport]={} if st not in attention[name][csoport]:attention[name][csoport][st]=[] for j in medals: if j in i['Csoportok']: attention[name][csoport][st].append(j) # In[124]: m[8] # In[123]: set([i['Medals'] for i in m]) # In[77]: def anneal(t,p=7): #pd.to_datetime('2020-01-01')-pd.to_datetime('1989-01-01')=11322 return (2-(pd.to_datetime('2020-01-01')-pd.to_datetime(t)).days/11322)**p tanneal={} anneal2020=anneal(pd.to_datetime('2020')) for t in pd.date_range('1990','2020'): tanneal[t]=anneal(t)/anneal2020+0.1 tanneal[str(t)[:10]]=tanneal[t] plt.subplots(1,1,figsize=(5,3)) plt.plot(tanneal.keys(),tanneal.values()) plt.ylabel('Elavulási együttható') plt.show() # In[78]: edges={} edges2={} points={'Ország':1.5,'Bizottság':4,'Párt':2,'Megye':1} for tipus in data: for csoport in data[tipus]: print(csoport) for t in data[tipus][csoport]: names=list(data[tipus][csoport][t]) for r in range(len(names)): name=names[r] for p in range(r+1,len(names)): name2=names[p] nset=tuple(np.sort([name,name2])) v=points[tipus] #symmetric if nset not in edges:edges[nset]={'anneal':0,'values':0} edges[nset]['values']+=v edges[nset]['anneal']+=v*tanneal[t] #asymmetric nset=tuple(np.sort([name,name2])) v=points[tipus]*attention[name][csoport][t] if nset not in edges2:edges2[nset]={'weight':0} edges2[nset]['weight']+=v*tanneal[t] nset=tuple(np.sort([name,name2])) v=points[tipus]*attention[name2][csoport][t] nset=tuple(np.sort([name,name2])[::-1]) if nset not in edges2:edges2[nset]={'weight':0} edges2[nset]['weight']+=v*tanneal[t] # Save # In[86]: import pickle # In[87]: def save_obj(obj, name ): with open('ignore/'+ name + '.pkl', 'wb') as f: pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) def load_obj(name ): with open('ignore/' + name + '.pkl', 'rb') as f: return pickle.load(f) # In[88]: save_obj(edges,'plotter_name_edges') # In[82]: #edges=load_obj('plotter_name_edges') # Weighted # In[100]: edges2l=[(i[0],i[1],edges2[i]) for i in edges2] G = nx.DiGraph() G.add_edges_from(edges2l) nodes={} r = nx.pagerank(G,weight='weight') mr=np.median(list(r.values())) for i in r: nodes[i]={'pr':r[i]/mr} links={} for i in G.edges(data=True): name=i[0] name2=i[1] z=i[2]['weight'] if name not in links:links[name]={} if name2 not in links:links[name2]={} links[name][name2]=z # In[101]: #save network={'nodes':nodes,'links':links,'size':len(edges2l)} print(len(nodes),'nodes and',len(G.edges()),'edges') open('ignore/plotter_name_weighted.json','w').write(json.dumps(network)) import zipfile with zipfile.ZipFile("json/plotter_name_weighted.zip", "w", compression=zipfile.ZIP_DEFLATED) as zf: zf.write("ignore/plotter_name_weighted.json",'plotter_name_weighted.json') # Prune # In[97]: min_t=365*4*10 edges2l=[(i[0],i[1],edges2[i]) for i in edges2 if edges2[i]['weight']>min_t] G = nx.DiGraph() G.add_edges_from(edges2l) nodes={} r = nx.pagerank(G,weight='weight') mr=np.median(list(r.values())) for i in r: nodes[i]={'pr':r[i]/mr} links={} for i in G.edges(data=True): name=i[0] name2=i[1] z=i[2]['weight'] if name not in links:links[name]={} if name2 not in links:links[name2]={} links[name][name2]=z # In[99]: #save network={'nodes':nodes,'links':links,'size':len(edges2l)} print(len(nodes),'nodes and',len(G.edges()),'edges') open('ignore/plotter_name_weighted_pruned.json','w').write(json.dumps(network)) import zipfile with zipfile.ZipFile("json/plotter_name_weighted_pruned.zip", "w", compression=zipfile.ZIP_DEFLATED) as zf: zf.write("ignore/plotter_name_weighted_pruned.json",'plotter_name_weighted_pruned.json')