#!/usr/bin/env python
# coding: utf-8

# # Supervised Learning with GCN
# 
# Graph neural networks (GNNs) combines superiority of both graph analytics and machine learning. 
# GraphScope provides the capability to process learning tasks. In this tutorial, we demostrate 
# how GraphScope trains a model with GCN.
# 
# The learning task is node classification on a citation network. In this task, the algorithm has 
# to determine the label of the nodes in [Cora](https://linqs.soe.ucsc.edu/data) dataset. 
# The dataset consists of academic publications as the nodes and the citations between them as the links: if publication A cites publication B, then the graph has an edge from A to B. The nodes are classified into one of seven subjects, and our model will learn to predict this subject.
# 
# In this task, we use Graph Convolution Network (GCN) to train the model. The core of the GCN neural network model is a "graph convolution" layer. This layer is similar to a conventional dense layer, augmented by the graph adjacency matrix to use information about a node's connections.
# 
# This tutorial has the following steps:
# 
# - Launching learning engine and attaching the loaded graph.
# - Defining train process with builtin GCN model and config hyperparameters
# - Training and evaluating
# 

# In[ ]:


# Install graphscope package if you are NOT in the Playground

get_ipython().system('pip3 install graphscope')
get_ipython().system('pip3 uninstall -y importlib_metadata  # Address an module conflict issue on colab.google. Remove this line if you are not on colab.')


# In[ ]:


# Import the graphscope module.

import graphscope

graphscope.set_option(show_log=False)  # enable logging


# In[ ]:


# Load cora dataset

from graphscope.dataset import load_cora

graph = load_cora()


# 
# Then, we need to define a feature list for training. The training feature list should be seleted from the vertex properties. In this case, we choose all the properties prefix with "feat_" as the training features.
# 
# With the featrue list, next we launch a learning engine with the [graphlearn](https://graphscope.io/docs/reference/session.html#graphscope.Session.graphlearn) method of graphscope. 
# 
# In this case,  we specify the GCN training over "paper" nodes and "cites" edges.
# 
# With "gen_labels", we split the "paper" nodes into three parts, 75% are used as training set, 10% are used for validation and 15% used for testing.
# 

# In[ ]:


# define the features for learning
paper_features = []
for i in range(1433):
    paper_features.append("feat_" + str(i))

# launch a learning engine.
lg = graphscope.graphlearn(
    graph,
    nodes=[("paper", paper_features)],
    edges=[("paper", "cites", "paper")],
    gen_labels=[
        ("train", "paper", 100, (0, 75)),
        ("val", "paper", 100, (75, 85)),
        ("test", "paper", 100, (85, 100)),
    ],
)


# We use the builtin GCN model to define the training process. You can find more detail about all the builtin learning models on [Graph Learning Model](https://graphscope.io/docs/learning_engine.html#data-model)
# 
# In the example, we use tensorflow as "NN" backend trainer.

# In[ ]:


from graphscope.learning.examples import GCN
from graphscope.learning.graphlearn.python.model.tf.optimizer import get_tf_optimizer
from graphscope.learning.graphlearn.python.model.tf.trainer import LocalTFTrainer

# supervised GCN.


def train(config, graph):
    def model_fn():
        return GCN(
            graph,
            config["class_num"],
            config["features_num"],
            config["batch_size"],
            val_batch_size=config["val_batch_size"],
            test_batch_size=config["test_batch_size"],
            categorical_attrs_desc=config["categorical_attrs_desc"],
            hidden_dim=config["hidden_dim"],
            in_drop_rate=config["in_drop_rate"],
            neighs_num=config["neighs_num"],
            hops_num=config["hops_num"],
            node_type=config["node_type"],
            edge_type=config["edge_type"],
            full_graph_mode=config["full_graph_mode"],
        )

    trainer = LocalTFTrainer(
        model_fn,
        epoch=config["epoch"],
        optimizer=get_tf_optimizer(
            config["learning_algo"], config["learning_rate"], config["weight_decay"]
        ),
    )
    trainer.train_and_evaluate()


# define hyperparameters
config = {
    "class_num": 7,  # output dimension
    "features_num": 1433,
    "batch_size": 140,
    "val_batch_size": 300,
    "test_batch_size": 1000,
    "categorical_attrs_desc": "",
    "hidden_dim": 128,
    "in_drop_rate": 0.5,
    "hops_num": 2,
    "neighs_num": [5, 5],
    "full_graph_mode": False,
    "agg_type": "gcn",  # mean, sum
    "learning_algo": "adam",
    "learning_rate": 0.01,
    "weight_decay": 0.0005,
    "epoch": 5,
    "node_type": "paper",
    "edge_type": "cites",
}


# After define training process and hyperparameters,
# 
# Now we can start the traning process with learning engine "" and the hyperparameters configurations.

# In[ ]:


train(config, lg)


# In[ ]: