#!/usr/bin/env python # coding: utf-8 # # Supervised Learning with GCN # # Graph neural networks (GNNs) combines superiority of both graph analytics and machine learning. # GraphScope provides the capability to process learning tasks. In this tutorial, we demostrate # how GraphScope trains a model with GCN. # # The learning task is node classification on a citation network. In this task, the algorithm has # to determine the label of the nodes in [Cora](https://linqs.soe.ucsc.edu/data) dataset. # The dataset consists of academic publications as the nodes and the citations between them as the links: if publication A cites publication B, then the graph has an edge from A to B. The nodes are classified into one of seven subjects, and our model will learn to predict this subject. # # In this task, we use Graph Convolution Network (GCN) to train the model. The core of the GCN neural network model is a "graph convolution" layer. This layer is similar to a conventional dense layer, augmented by the graph adjacency matrix to use information about a node's connections. # # This tutorial has the following steps: # # - Launching learning engine and attaching the loaded graph. # - Defining train process with builtin GCN model and config hyperparameters # - Training and evaluating # # In[ ]: # Install graphscope package if you are NOT in the Playground get_ipython().system('pip3 install graphscope') get_ipython().system('pip3 uninstall -y importlib_metadata # Address an module conflict issue on colab.google. Remove this line if you are not on colab.') # In[ ]: # Import the graphscope module. import graphscope graphscope.set_option(show_log=False) # enable logging # In[ ]: # Load cora dataset from graphscope.dataset import load_cora graph = load_cora() # # Then, we need to define a feature list for training. The training feature list should be seleted from the vertex properties. In this case, we choose all the properties prefix with "feat_" as the training features. # # With the featrue list, next we launch a learning engine with the [graphlearn](https://graphscope.io/docs/reference/session.html#graphscope.Session.graphlearn) method of graphscope. # # In this case, we specify the GCN training over "paper" nodes and "cites" edges. # # With "gen_labels", we split the "paper" nodes into three parts, 75% are used as training set, 10% are used for validation and 15% used for testing. # # In[ ]: # define the features for learning paper_features = [] for i in range(1433): paper_features.append("feat_" + str(i)) # launch a learning engine. lg = graphscope.graphlearn( graph, nodes=[("paper", paper_features)], edges=[("paper", "cites", "paper")], gen_labels=[ ("train", "paper", 100, (0, 75)), ("val", "paper", 100, (75, 85)), ("test", "paper", 100, (85, 100)), ], ) # We use the builtin GCN model to define the training process. You can find more detail about all the builtin learning models on [Graph Learning Model](https://graphscope.io/docs/learning_engine.html#data-model) # # In the example, we use tensorflow as "NN" backend trainer. # In[ ]: from graphscope.learning.examples import GCN from graphscope.learning.graphlearn.python.model.tf.optimizer import get_tf_optimizer from graphscope.learning.graphlearn.python.model.tf.trainer import LocalTFTrainer # supervised GCN. def train(config, graph): def model_fn(): return GCN( graph, config["class_num"], config["features_num"], config["batch_size"], val_batch_size=config["val_batch_size"], test_batch_size=config["test_batch_size"], categorical_attrs_desc=config["categorical_attrs_desc"], hidden_dim=config["hidden_dim"], in_drop_rate=config["in_drop_rate"], neighs_num=config["neighs_num"], hops_num=config["hops_num"], node_type=config["node_type"], edge_type=config["edge_type"], full_graph_mode=config["full_graph_mode"], ) trainer = LocalTFTrainer( model_fn, epoch=config["epoch"], optimizer=get_tf_optimizer( config["learning_algo"], config["learning_rate"], config["weight_decay"] ), ) trainer.train_and_evaluate() # define hyperparameters config = { "class_num": 7, # output dimension "features_num": 1433, "batch_size": 140, "val_batch_size": 300, "test_batch_size": 1000, "categorical_attrs_desc": "", "hidden_dim": 128, "in_drop_rate": 0.5, "hops_num": 2, "neighs_num": [5, 5], "full_graph_mode": False, "agg_type": "gcn", # mean, sum "learning_algo": "adam", "learning_rate": 0.01, "weight_decay": 0.0005, "epoch": 5, "node_type": "paper", "edge_type": "cites", } # After define training process and hyperparameters, # # Now we can start the traning process with learning engine "" and the hyperparameters configurations. # In[ ]: train(config, lg) # In[ ]: