#!/usr/bin/env python # coding: utf-8 # # Meta functionalities of the EpiGraphDB platform # In this notebook we show the following aspects of the EpiGraphDB platform, and how to use the API to get the information: # # 1. Metadata: meta nodes and meta edges, and the overall schema. # 2. Search for a specific node under the meta node. # 3. Cypher: how to query the database directly using Neo4j Cypher # # For detailed documentation on the API endpoints please visit: # # - The Swagger interface: http://api.epigraphdb.org # - The sections regarding API endpoints on the documentation site: http://docs.epigraphdb.org/api/api-endpoints/ # In[1]: from pprint import pformat import networkx as nx import pandas as pd import requests # In[2]: API_URL = "https://api.epigraphdb.org" requests.get(f"{API_URL}/ping").json() # ## Metadata # Here we query for the metadata information using the endpoint `GET /meta/schema`, which will be used for downstream processing. # In[3]: endpoint = "/meta/schema" params = {"graphviz": False, "plot": False} r = requests.get(f"{API_URL}{endpoint}", params=params) r.raise_for_status() metadata = r.json() # Preview of metadata information keys = metadata.keys() print(pformat(keys), "\n") for key in list(keys): print(f"# {key}:") print(pformat(metadata[key])[:1000], "\n") # ### Meta nodes # # We can extract the specific meta node information as a pandas dataframe from the metadata. # In[4]: meta_node_df = pd.DataFrame.from_dict(metadata["nodes"], orient="index") ( meta_node_df.sort_index().assign( count=lambda df: df["count"].apply(lambda x: f"{x:,}") ) ) # ### Meta relationships and connections # # We can also extract the meta relationship (edge) information, and the connections. # In[5]: meta_rel_df = pd.DataFrame.from_dict(metadata["edges"], orient="index").merge( pd.DataFrame.from_dict( {_["rel"]: _ for _ in metadata["connections"]}, orient="index" )[["from_node", "to_node"]], left_index=True, right_index=True, ) ( meta_rel_df.sort_values(by=["from_node", "to_node"]).assign( count=lambda df: df["count"].apply(lambda x: f"{x:,}") ) ) # ### Schema plot # We can generate a network diagram of the graph db schema using `networkx`. # In[6]: graph = nx.from_pandas_edgelist( meta_rel_df, source="from_node", target="to_node" ) # In[7]: nx.draw( G=graph, pos=nx.kamada_kawai_layout(graph), with_labels=True, node_color="white", ) # A detailed version of the shema plot can be obtained from the API: # # ![schema_plot](http://ieu-mrbssd1.epi.bris.ac.uk:28046/meta/schema?graphviz=true&plot=true) # ## Search for specific node # Users can use [the explorer on the Web UI](http://dev.epigraphdb.org/explore) to search for a specific node by: # # - fuzzy matching by "name" field. # - exact matching by "ID" field if you know the its ID (e.g. the ID to a GWAS from IEU GWAS Database). # # Here we show how these are done at the API level using `Gwas` nodes as an example. # First we need to know what the "ID" and "name" fields are for the meta nodes using `GET /meta/nodes/id-name-schema`: # In[8]: r = requests.get(f"{API_URL}/meta/nodes/id-name-schema") r.raise_for_status() meta_node_fields = r.json() meta_node_fields # ### Fuzzy matching # Here we search for nodes can contain "body mass index" in their traits. # In[9]: name = "body mass index" r = requests.get(f"{API_URL}/meta/nodes/Gwas/search", params={"name": name}) r.raise_for_status() print(pformat(r.json())[:3000]) # ### Exact matching # Similarly, we can exact match a specific node by its ID. # In[10]: id = "ieu-a-2" r = requests.get(f"{API_URL}/meta/nodes/Gwas/search", params={"id": id}) r.raise_for_status() print(pformat(r.json())[:3000]) # ## Cypher (advanced) # Advanced users that are familiar with Neo4j Cypher can query the database using Cypher directly. # In[11]: query = """ MATCH (exposure:Gwas)-[mr:MR]->(outcome:Gwas) WHERE exposure.trait = "Body mass index" RETURN exposure, outcome, mr LIMIT 2 """ r = requests.post(f"{API_URL}/cypher", json={"query": query}) r.raise_for_status() print(pformat(r.json())[:3000]) # Alternatively we provide an endpoint `POST /cypher/builder/plain` that assist users in querying for simple cypher queries. # In[12]: payload = { "source_meta_node": "Gwas", "target_meta_node": "Gwas", "meta_rel": "MR", "where": ["source_node.trait = 'Body mass index'"], "limit": 2, } r = requests.post(f"{API_URL}/cypher/builder/plain", json=payload) r.raise_for_status() print(pformat(r.json())[:3000]) # Again for the detailed documentation on the API endpoints please visit: # # - The Swagger interface: http://api.epigraphdb.org # - The sections regarding API endpoints on the documentation site: http://docs.epigraphdb.org/api/api-endpoints/