!pip install -q stellargraph
!pip install -q node2vec

import pandas as pd
import networkx as nx
from node2vec import Node2Vec
from node2vec.edges import HadamardEmbedder
import matplotlib.pyplot as plt
from sklearn import metrics
from stellargraph.data import EdgeSplitter
from sklearn.ensemble import RandomForestClassifier

def draw_graph(G, node_names={}, node_size=500):
    pos_nodes = nx.spring_layout(G)
    nx.draw(G, pos_nodes, with_labels=True, node_size=node_size, edge_color='gray', arrowsize=30)
    
    pos_attrs = {}
    for node, coords in pos_nodes.items():
        pos_attrs[node] = (coords[0], coords[1] + 0.08)
        
    nx.draw_networkx_labels(G, pos_attrs, font_family='serif', font_size=20)
    
    plt.axis('off')
    axis = plt.gca()
    axis.set_xlim([1.2*x for x in axis.get_xlim()])
    axis.set_ylim([1.2*y for y in axis.get_ylim()])
    plt.show()

edges = [[1,3],[2,3],[2,4],[4,5],[5,6],[5,7]]
G = nx.from_edgelist(edges)
draw_graph(G)

# Resource allocation index - index-based method
preds = nx.resource_allocation_index(G,[(1,2),(2,7),(4,6)])
print(list(preds))

# Jaccard coefficient - another index-based method
preds = nx.jaccard_coefficient(G,[(1,2),(2,7),(4,6)])
print(list(preds))

G.nodes[1]["community"] = 0
G.nodes[2]["community"] = 0
G.nodes[3]["community"] = 0

G.nodes[4]["community"] = 1
G.nodes[5]["community"] = 1
G.nodes[6]["community"] = 1
G.nodes[7]["community"] = 1

# Community common neighbor - community-based method
preds = nx.cn_soundarajan_hopcroft(G,[(1,2),(2,7),(4,6)])
print(list(preds))

# # Community resource allocation - another community-based method
preds = nx.ra_index_soundarajan_hopcroft(G,[(1,2),(2,7),(4,6)])
print(list(preds))

!wget https://linqs-data.soe.ucsc.edu/public/lbc/cora.tgz
!tar -xvf cora.tgz

edgelist = pd.read_csv("./cora/cora.cites", sep='\t', header=None, names=["target", "source"])
G = nx.from_pandas_edgelist(edgelist)
draw_graph(G)

edgeSplitter = EdgeSplitter(G)
graph_test, samples_test, labels_test = edgeSplitter.train_test_split(
    p=0.1, method="global"
)

edgeSplitter = EdgeSplitter(graph_test, G)
graph_train, samples_train, labels_train = edgeSplitter.train_test_split(
    p=0.1, method="global"
)

node2vec = Node2Vec(graph_train)
model = node2vec.fit()
edges_embs = HadamardEmbedder(keyed_vectors=model.wv)
train_embeddings = [edges_embs[str(x[0]),str(x[1])] for x in samples_train]

test_embeddings = [edges_embs[str(x[0]),str(x[1])] for x in samples_test]

rf = RandomForestClassifier(n_estimators=1000)
rf.fit(train_embeddings, labels_train);

y_pred = rf.predict(test_embeddings)

print('Precision:', metrics.precision_score(labels_test, y_pred))
print('Recall:', metrics.recall_score(labels_test, y_pred))
print('F1-Score:', metrics.f1_score(labels_test, y_pred))