!pip install -q stellargraph !pip install -q node2vec import pandas as pd import networkx as nx from node2vec import Node2Vec from node2vec.edges import HadamardEmbedder import matplotlib.pyplot as plt from sklearn import metrics from stellargraph.data import EdgeSplitter from sklearn.ensemble import RandomForestClassifier def draw_graph(G, node_names={}, node_size=500): pos_nodes = nx.spring_layout(G) nx.draw(G, pos_nodes, with_labels=True, node_size=node_size, edge_color='gray', arrowsize=30) pos_attrs = {} for node, coords in pos_nodes.items(): pos_attrs[node] = (coords[0], coords[1] + 0.08) nx.draw_networkx_labels(G, pos_attrs, font_family='serif', font_size=20) plt.axis('off') axis = plt.gca() axis.set_xlim([1.2*x for x in axis.get_xlim()]) axis.set_ylim([1.2*y for y in axis.get_ylim()]) plt.show() edges = [[1,3],[2,3],[2,4],[4,5],[5,6],[5,7]] G = nx.from_edgelist(edges) draw_graph(G) # Resource allocation index - index-based method preds = nx.resource_allocation_index(G,[(1,2),(2,7),(4,6)]) print(list(preds)) # Jaccard coefficient - another index-based method preds = nx.jaccard_coefficient(G,[(1,2),(2,7),(4,6)]) print(list(preds)) G.nodes[1]["community"] = 0 G.nodes[2]["community"] = 0 G.nodes[3]["community"] = 0 G.nodes[4]["community"] = 1 G.nodes[5]["community"] = 1 G.nodes[6]["community"] = 1 G.nodes[7]["community"] = 1 # Community common neighbor - community-based method preds = nx.cn_soundarajan_hopcroft(G,[(1,2),(2,7),(4,6)]) print(list(preds)) # # Community resource allocation - another community-based method preds = nx.ra_index_soundarajan_hopcroft(G,[(1,2),(2,7),(4,6)]) print(list(preds)) !wget https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz !tar -xvf cora.tgz edgelist = pd.read_csv("./cora/cora.cites", sep='\t', header=None, names=["target", "source"]) G = nx.from_pandas_edgelist(edgelist) draw_graph(G) edgeSplitter = EdgeSplitter(G) graph_test, samples_test, labels_test = edgeSplitter.train_test_split( p=0.1, method="global" ) edgeSplitter = EdgeSplitter(graph_test, G) graph_train, samples_train, labels_train = edgeSplitter.train_test_split( p=0.1, method="global" ) node2vec = Node2Vec(graph_train) model = node2vec.fit() edges_embs = HadamardEmbedder(keyed_vectors=model.wv) train_embeddings = [edges_embs[str(x[0]),str(x[1])] for x in samples_train] test_embeddings = [edges_embs[str(x[0]),str(x[1])] for x in samples_test] rf = RandomForestClassifier(n_estimators=1000) rf.fit(train_embeddings, labels_train); y_pred = rf.predict(test_embeddings) print('Precision:', metrics.precision_score(labels_test, y_pred)) print('Recall:', metrics.recall_score(labels_test, y_pred)) print('F1-Score:', metrics.f1_score(labels_test, y_pred))