The ISA api comes packaged with a graphQL interface and a JSON-LD serializer to help users query investigations. The aim of this notebook is to:
To illustrate this notebook, we will try to get the names of all the protocols types stored in an ISA investigation.
# Let's first import all the packages we need
from os import path
import json
from rdflib import Graph, Namespace
from isatools.isajson import load
from isatools.model import set_context
filepath = path.join('json', 'BII-S-3', 'BII-S-3.json')
with open(filepath, 'r') as f:
investigation = load(f)
query = """
{
studies {
protocols {
type: protocolType { annotationValue }
}
}
}
"""
protocols_graphql = []
results = investigation.execute_query(query)
for study in results.data['studies']:
protocols = study['protocols']
for protocol in protocols:
value = protocol['type']['annotationValue']
if value not in protocols_graphql:
protocols_graphql.append(value)
print(protocols_graphql)
['sample collection', 'nucleic acid extraction', 'reverse transcription', 'library construction', 'nucleic acid sequencing', 'data transformation']
set_context(vocab='wd', local=True, prepend_url='https://example.com', all_in_one=False)
The set_context()
method takes five parameters:
sdo
, obo
, wdt
, wd
and sio
True
, uses local files else the GitHub contextsTrue
, all the contexts are pulled from a single file instead of separate context filesTrue
, the context is included in the JSON-LD serialization, else it only contains the URL or local path to the context file.ld = investigation.to_dict(ld=True)
The investigation can be serialized to json with the to_dict()
method. By passing the optional parameter ld=True
, the serializer binds the @type
, @context
and @id
to each object in the JSON.
Before we can generate a graph we need to create the proper namespaces and transform the ld
variable into a string
# Creating the namespace
WD = Namespace("http://www.wikidata.org/entity/")
ISA = Namespace('https://isa.org/')
ld_string = json.dumps(ld) # Get a string representation of the ld variable
graph = Graph() # Create an empty graph
graph.parse(data=ld_string, format='json-ld') # Load the data into the graph
# Finally, bind the namespaces to the graph
graph.bind('wdt', WD)
graph.bind('isa', ISA)
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT distinct ?protocolTypeName
WHERE {
?p rdf:type wd:Q41689629 . # Is a protocol
?p wd:P7793 ?protocolType .
?protocolType wd:P527 ?protocolTypeName . # Get each protocol type name
FILTER (?protocolTypeName!=""^^wd:Q1417099) # Filter out empty protocol type name
}
"""
protocols_sparql = []
for node in graph.query(query):
n = node.asdict()
for fieldName in n:
fieldVal = str(n[fieldName].toPython())
if fieldVal not in protocols_sparql:
protocols_sparql.append(fieldVal)
print(protocols_sparql)
assert(protocols_sparql == protocols_graphql)
['sample collection', 'nucleic acid extraction', 'reverse transcription', 'library construction', 'nucleic acid sequencing', 'data transformation']