#!/usr/bin/env python # coding: utf-8 # # Notes and Fragments on Graphs and Hypergraphs # # Start off with a few simple worked examples based on JJ's *Hypernetworks in the Science of Complex Systems*. # In[2]: #Use pandas and networkx import pandas as pd import networkx as nx import numpy as np import seaborn as sns get_ipython().run_line_magic('matplotlib', 'inline') # ## Simple Bipartite Graphs # # Example from p.33. # In[3]: # Create graph B = nx.Graph() B.add_edges_from([('a1','b1'),('a1','b2'), ('a1','b3'), ('a2','b2'),('a2','b3'), ('a2','b4'), ('a2','b5'), ('a2','b6'), ('a3','b5'),('a3','b6'), ('a3','b7'), ('a3','b8')]) # In[4]: #We can see the nodes in the gaph, and get index of a particular node in that list B.nodes(), B.nodes().index('a2'), B.nodes(True) # In[5]: # Bipartite graph has two non-intersecting sets of nodes # Edges go from one set to another X, Y = nx.bipartite.sets(B) X, Y # In[6]: # Simple chart of nodes def bipartite_plot(B, X=None, Y=None): if X is None and Y is None: X, Y = nx.bipartite.sets(B) #http://stackoverflow.com/a/27085151/454773 c = nx.bipartite.color(B) nx.set_node_attributes(B, 'group', c) pos = dict() pos.update( (n, (1, i)) for i, n in enumerate(X) ) # put nodes from X at x=1 pos.update( (n, (2, i)) for i, n in enumerate(Y) ) # put nodes from Y at x=2 colors=[] for i in B: if B.node[i]['group']: colors.append('lightblue') else: colors.append('pink') nx.draw(B, pos=pos,with_labels = True,node_color=colors) bipartite_plot(B, X, Y) # In[7]: #Neighbours / hyperedges B.neighbors('a1'), B.neighbors('b3') # In[8]: #Is there a path connecting two nodes? nx.has_path(B,'a1','a3') # In[9]: nx.shortest_path(B,'a1','a3'), nx.shortest_path_length(B,'a1','a3') # In[10]: for p in nx.all_simple_paths(B,'a1','a3'): print(p) # In[11]: #Projections onto one set: P1 = nx.bipartite.projected_graph(B, X) nx.draw(P1,with_labels = True) # In[12]: P2 = nx.bipartite.projected_graph(B, Y, multigraph=True) #Simple plotter doesn't reflect multi-edges though... nx.draw(P2,with_labels = True) # In[13]: #We can also project onto a multigraph to see the separate means by which nodes are connected P1= nx.bipartite.projected_graph(B, X, multigraph=True) PN = P1 if 'b1' in P1 else P2 PN['b6'], [(k, list(PN['b6'][k].keys())) for k in PN['b6'].keys() ] # In[14]: #Find nodes connected to a particular node via one or more in the other set PN['b6'], list(PN['b6'].keys()) # ## Displaying Adjacency Matrices # In[15]: #SparseDataFrame map from http://stackoverflow.com/a/17819427/454773 import numpy as np def pretty_matrix(B, X=None, Y=None): if X is None and Y is None: X, Y = nx.bipartite.sets(B) m=nx.adjacency_matrix(B) df=pd.SparseDataFrame([ pd.SparseSeries(m[i].toarray().ravel()) for i in np.arange(m.shape[0]) ]) dfa=df[[B.nodes().index(x) for x in X ]] dfa.columns=X dfa=dfa.iloc[[B.nodes().index(y) for y in Y ]] return dfa[sorted(dfa.columns)].rename({B.nodes().index(y):y for y in Y}).sort_index() # In[16]: pretty_matrix(B) # In[17]: def pretty_bipartite_matrix(B, X=None, Y=None): if X is None and Y is None: X, Y = nx.bipartite.sets(B) #There is a nx.bipartite.biadjacency_matrix(B, Y, X) function, but I don't see how to match index and label? m=nx.bipartite.biadjacency_matrix(B, Y, X) m2=pd.SparseDataFrame([ pd.SparseSeries(m[i].toarray().ravel()) for i in np.arange(m.shape[0]) ]) #Is this guaranteed to work correctly with the orderings? m2.columns=X return m2[sorted(m2.columns)].rename({[c for c in Y].index(y):y for y in Y}).sort_index() # In[18]: pretty_bipartite_matrix(B, X, Y) # ## Creating Graphs From Sets # # For example, p.34-36 # In[19]: Rsets= {'mouse':{'tiny','brown','quadruped','vegetarian'}, 'hare':{'small','brown','quadruped','vegetarian'}, 'deer':{'large','brown','quadruped','vegetarian','hooves','antlers'}, 'camel':{'large','brown','quadruped','vegetarian','hooves','hump'}, 'tiger':{'large','quadruped'}, 'falcon':{'small'}, 'chimpanzee':{'large','vegetarian'} } # In[20]: def bipartite_graphBuilder(R,B=None,undirected=True): if B is None: if undirected: B=nx.Graph() else: B=nx.DiGraph() for key in R: for descriptor in R[key]: B.add_edge(key,descriptor) return B # In[21]: B2=bipartite_graphBuilder(Rsets) bipartite_plot(B2) # In[22]: #If we ask about an item in one set, we get as dict keys the things it's connnected to in the other B2['deer'] # In[23]: # This means we can easily find intersecting connected properties of items in one set connected from the other def R(B, keys): setlist=[set(B[key].keys()) for key in keys] return set.intersection(*setlist) R(B2, ['hare','deer']) # In[24]: R(B2, R(B2, ['hare','deer']) ) # In[25]: R(B2, R(B2, {'hare','deer'}) ) # ### Galois Pairs # # p. 36 # In[26]: #Galoir pairs def GaloisPair(B,vals): return (vals, R(B, vals)) # In[27]: GaloisPair(B2,{'brown','quadruped','vegetarian'}) # In[28]: GaloisPair(B2,{'camel', 'deer', 'hare', 'mouse'}) # In[29]: pretty_bipartite_matrix(B2) # ## Q-Analysis # # pp. 51-52 # In[30]: Qsets={ 'Pete':{'gaming','pubs','cars','sport'}, 'Sam':{'pubs','cars','sport','fashion'}, 'Sue':{'fashion','painting','history','literature'}, 'Jane':{'history','literature','gardening','cooking'}, 'Tim':{'gardening','cooking','nature','science'} } # In[31]: QG=bipartite_graphBuilder(Qsets) bipartite_plot(QG) # In[32]: QG.neighbors('Pete') # In[33]: nx.shortest_path(QG,'Pete','Sam') # In[34]: for p in nx.all_shortest_paths(QG,'Pete','Sam'): print(p) # In[35]: #Construct a function based on a literal interpretation of the graph def Qnear(B,x,y): #Treat returning -1 as an error code? return len([e for e in nx.all_shortest_paths(QG,x,y) if len(e)==3])-1 Qnear(QG,'Pete','Sam') # In[36]: Qnear(QG,'Sam','Sue'), Qnear(QG,'Sue','Jane'), Qnear(QG,'Jane','Tim') # In[37]: #We can then look for Q-connected items - other than the singletons import itertools def componentFinder(setval): #Make a graph from connected pairs then find components across the graph T=nx.Graph() sets=[] for (x,y) in setval: T.add_edge(x,y) return [c for c in nx.connected_components(T)] #This is not the same as in the example - the singletons (connected to themselves) are ignored def Qconnected(B,N,X=None,anchor=None): ''' q-connected for q==N ''' def allcombos(B,X,N): qconnectedN=[] for (x,y) in itertools.combinations(X,2): if Qnear(B,x,y)>=N: qconnectedN.append((x,y)) return qconnectedN setval=None if X is None: X, Y = nx.bipartite.sets(B) if anchor is not None and anchor in Y: setval=allcombos(B,Y,N) else: setval=allcombos(B,X,N) return componentFinder(setval) # In[38]: Qconnected(QG,1,anchor="Sam") # In[39]: Qconnected(QG,0,anchor="Sam") # In[40]: Qconnected(QG,2,anchor="Sam") # In[41]: Qconnected(QG,3,anchor="Sam") # ### An Improved Model # # We could hack the above to also return as single set items members in the superset that aren't already returned, but that's a fudge. So how can we do it properly? # # p. 60-61 suggests a matrix calculation route. # # We can get the same (I think?) if we use the bipartite "biadjency" matrix that describes adjacent (connected) nodes in the projected bipartite graph. # In[165]: def nodematrix(B, anchor=None, X=None, Y=None): if X is None and Y is None: X, Y = nx.bipartite.sets(B) if anchor is not None and anchor in X: X,Y=Y,X mxb=nx.bipartite.biadjacency_matrix(B, Y, X) mx=mxb * mxb.T m2=pd.SparseDataFrame([ pd.SparseSeries(mx[i].toarray().ravel()) for i in np.arange(mx.shape[0]) ])-1 m2.columns=Y m2=m2[sorted(m2.columns)].rename({[c for c in Y].index(y):y for y in Y}).sort_index() return m2.replace(-1, np.nan) # In[43]: #This could be used to give us the singletons? NM=nodematrix(QG,anchor="Sam") NM # In[44]: nodematrix(QG,anchor="cars") # In[45]: def anotherQnear(NM,x,y): return NM.ix[x][y] # In[46]: anotherQnear(NM,'Sam','Pete') # In[47]: anotherQnear(NM,'Sam','Sam') # In[48]: def anotherQconnected(NM,N,X=None,anchor=None): ''' q-connected for q==N ''' def allcombosDiag(NM,X,N): qconnectedN=[] combos=[x for x in itertools.combinations(X,2)]+[(x,x) for x in X] for (x,y) in combos: if anotherQnear(NM,x,y)>=N: qconnectedN.append((x,y)) return qconnectedN setval=None if X is None: X = list(NM.columns) if anchor is not None and anchor in Y: setval=allcombosDiag(NM,Y,N) else: setval=allcombosDiag(NM,X,N) return componentFinder(setval) # In[49]: anotherQconnected(NM,2,anchor='Sam') # In[50]: anotherQconnected(NM,3,anchor='Sam') # In[51]: anotherQconnected(NM,1,anchor='Sam') # In[52]: anotherQconnected(NM,0,anchor='Sam') # ### Directed Graphs # # Trading network - directed graph, p. 52-53 # # I'm not sure about the sense of these, in that the information flows one way, so can something be connected to another thing going one way, but not the other? e.g. can we say *x* is `qnear` *y* but *y* is not `qnear` *x*? I need to go back and read the definitions again... # In[53]: countries=['Argentina','Barbados','Bolivia','Brazil','Chile','Colombia','Ecuador', 'Trinidad-Tobago','Paraguay', 'Peru','Uruguay','Venezuela'] countrycodes=[x[:3] for x in countries] print(countrycodes) # In[54]: Tlist={ 'Argentina': [1,0,1,1,1,0,0,0,1,0,1,0], 'Barbados': [0,1,0,0,0,0,0,1,0,0,0,0], 'Bolivia': [0,0,1,0,0,0,0,0,0,0,0,0], 'Brazil': [1,1,1,1,1,1,1,1,1,1,1,1], 'Chile': [1,0,1,0,1,0,0,0,1,1,1,0], 'Colombia': [0,0,1,0,1,1,1,0,0,1,0,1], 'Ecuador': [0,0,0,0,0,1,1,0,0,1,0,0], 'Trinidad-Tobago': [0,1,0,0,0,0,0,1,0,0,0,0], 'Paraguay': [0,0,0,0,0,0,0,0,1,0,0,0], 'Peru': [0,0,1,0,0,0,0,0,0,1,0,0], 'Uruguay': [0,0,0,0,0,0,0,0,0,0,1,0], 'Venezuela': [0,0,1,0,1,1,1,1,1,1,0,1] } Tset={k:[c[1] for c in zip(Tlist[k],countrycodes) if c[0]] for k in Tlist} Tset # In[55]: BD=bipartite_graphBuilder(Tset,undirected=False) bipartite_plot(BD) # In[56]: pretty_matrix(BD, Y=countries, X=countrycodes) # In[57]: ND=nodematrix(BD,anchor="Peru") ND # In[58]: anotherQconnected(ND,7,anchor='Brazil'), anotherQconnected(ND,6,anchor='Brazil') # In[59]: anotherQconnected(ND,5,anchor='Brazil'), anotherQconnected(ND,3,anchor='Brazil') # In[60]: anotherQconnected(ND,2,anchor='Brazil') # In[61]: anotherQconnected(ND,1,anchor='Brazil') # In[62]: anotherQconnected(ND,0,anchor='Brazil') # #### Remove a Node - Brazil # In[78]: BD.remove_node('Brazil') NDx=nodematrix(BD,anchor="Peru") NDx # In[80]: anotherQconnected(NDx,7,anchor='Chile'), anotherQconnected(NDx,6,anchor='Chile') # In[81]: anotherQconnected(NDx,5,anchor='Chile') # In[82]: anotherQconnected(NDx,4,anchor='Chile') # In[83]: anotherQconnected(NDx,3,anchor='Chile') # In[84]: anotherQconnected(NDx,2,anchor='Chile') # In[85]: anotherQconnected(NDx,1,anchor='Chile') #This is different to JJ's? # In[86]: anotherQconnected(NDx,0,anchor='Chile') # ### How about importing countries? # What happens if we reverse the edges and rerun the analysis? Is that the sense of this (p54)? # # Sense of reading the arrows is now more along lines of "gives money to, for exports"? # In[87]: Tset2={c:[] for c in countrycodes} for c in Tset: for c2 in Tset[c]: Tset2[c2].append(c) Tset2 # In[115]: BD2=bipartite_graphBuilder(Tset2,undirected=False) bipartite_plot(BD2) # In[116]: ND2=nodematrix(BD2,anchor="Per") ND2 # In[117]: anotherQconnected(ND2,6,anchor='Bra') # In[118]: anotherQconnected(ND2,5,anchor='Bra') # In[119]: anotherQconnected(ND2,4,anchor='Bra') # In[120]: anotherQconnected(ND2,3,anchor='Bra') # Differs from JJ's? # In[121]: anotherQconnected(ND2,2,anchor='Bra') # In[122]: anotherQconnected(ND2,1,anchor='Bra') # In[126]: GaloisPair(BD2,{'Bol','Chi','Per'}) # ### Eccentricity # # p. 59 # In[127]: glasses={'g1':{'ThinStem','Small','TulipShape','Narrow','Curved'}, 'g2':{'ThinStem','Tall','CupShape','Wide','Curved','Logo'}, 'g3':{'ThinStem','Tall','TulipShape','Narrow','Curved'}, 'g4':{'FatStem','Tall','VeeShape','Narrow','Straight'}, 'g5':{'FatStem','Small','VeeShape','Narrow','Straight'}, 'g6':{'ThinStem','Small','TubeShape','Narrow','Straight'} } GL=bipartite_graphBuilder(glasses) bipartite_plot(GL) # In[159]: def _ecc(B,x,y): return len(set(B[x].keys())-set(B[y].keys())) / len(B[x].keys()) # In[160]: _ecc(GL,'g1','g3'), _ecc(GL,'g2','g3'), _ecc(GL,'g3','g1'), _ecc(GL,'g4','g5'), _ecc(GL,'g5','g4'), _ecc(GL,'g6','g1') # In[161]: def _eccg(B,x): X, Y = nx.bipartite.sets(B) if x in Y: X,Y=Y,X return min([_ecc(B,x,z) for z in X if z!=x]) # In[162]: _eccg(GL,'g1'), _eccg(GL,'g2'), _eccg(GL,'g3'), _eccg(GL,'g4'), _eccg(GL,'g5'), _eccg(GL,'g6') # In[163]: def ecc(B,x,y=None): if y is None: return _eccg(B,x) return _ecc(B,x,y) # In[187]: ecc(GL,'g1'), ecc(GL,'g2','g3') # In[188]: def qAnalysis(B,anchor): X, Y = nx.bipartite.sets(B) if anchor is not None and anchor in X: X,Y=Y,X N=nodematrix(B,anchor=anchor, X=X, Y=Y) S=len(Y) for i in range(S-1,-1, -1): print('{}: {}'.format(i, anotherQconnected(N,i,anchor=anchor))) for y in sorted(list(Y)): print('Eccentricity of {}: {}'.format(y, ecc(B,y))) # In[189]: qAnalysis(GL,'g1') # In[ ]: