For now, the Nexus web application can be used to get a token. We are looking for other simpler alternatives.
Once a token is obtained then proceed to paste it below.
import getpass
TOKEN = getpass.getpass()
from kgforge.core import KnowledgeGraphForge
# Let target the sscx dissemination project in Nexus
ORG = "public"
PROJECT = "sscx"
forge = KnowledgeGraphForge("prod-forge-nexus.yml",bucket=f"{ORG}/{PROJECT}",token=TOKEN)
forge.types()
# Supported filters for the time being are:
from kgforge.core.commons.strategies import ResolvingStrategy
text = "somatosensory"
limit=10
# other Search strategy can be ResolvingStrategy.BEST_MATCH, ResolvingStrategy.EXACT_MATCH
brain_region = forge.resolve(text, scope="ontology", target="terms", strategy=ResolvingStrategy.ALL_MATCHES, limit=limit)
forge.as_dataframe(brain_region).head(100)
# Supported filters for the time being are:
_type = "ReconstructedCell"
classification_type="nsg:MType"
mType="L4_NBC"
brainRegion = "primary somatosensory cortex"
layer = "layer 4"
encodingFormat="application/swc"
limit=2
forge.template("Dataset")
path = forge.paths("Dataset") # to have autocompletion on the properties
data = forge.search(path.type.id == _type,
path.annotation.hasBody.type.id ==classification_type, # Known issue: use path.annotation.hasBody.type.id in case of error: AttributeError: 'PathWrapper' object has no attribute '_path'
path.annotation.hasBody.label ==mType,
path.brainLocation.brainRegion.label == brainRegion,
path.brainLocation.layer.label == layer,
path.distribution.encodingFormat == encodingFormat,
limit=limit)
print(str(len(data))+" dataset of type '"+_type+"' found.")
DISPLAY_LIMIT = 10
reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label",
"brainLocation.layer.id","brainLocation.layer.label", "contribution",
"brainLocation.layer.id","brainLocation.layer.label","distribution.name",
"distribution.contentUrl","distribution.encodingFormat"])
forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT])
dirpath = "./downloaded/"
forge.download(data, "distribution.contentUrl", dirpath)
It is possible to get files locations and storages (e.g. Blue Brain Nexus Store or GPFS, ...).
forge.as_json(data[0].distribution[0].atLocation)
data[0].distribution[0].atLocation.location
# Supported filters for the time being are:
_type = "Trace"
classification_type="nsg:EType"
eType="cADpyr"
brainRegion = "primary somatosensory cortex"
layer = "layer 5"
encodingFormat="application/nwb"
limit=10
path = forge.paths("Dataset") # to have autocompletion on the properties
data = forge.search(path.type.id == _type,
path.annotation.hasBody.type.id ==classification_type,
path.annotation.hasBody.label ==eType,
path.brainLocation.brainRegion.label == brainRegion,
path.brainLocation.layer.label == layer,
path.distribution.encodingFormat == encodingFormat,
limit=limit)
print(str(len(data))+" data of type '"+_type+"' found.")
DISPLAY_LIMIT = 10
reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label",
"brainLocation.layer.id","brainLocation.layer.label", "contribution",
"brainLocation.layer.id","brainLocation.layer.label",
"distribution.name","distribution.contentUrl","distribution.encodingFormat"])
forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT])
dirpath = "./downloaded/"
forge.download(data, "distribution.contentUrl", dirpath)
# Supported filters for the time being are:
_type = "LayerThickness"
brainRegion = "primary somatosensory cortex"
layer = "layer 2"
encodingFormat="application/xlsx"
limit=10
path = forge.paths("Dataset") # to have autocompletion on the properties
data = forge.search(path.type.id == _type,
path.brainLocation.layer.label == layer,
path.brainLocation.brainRegion.label == brainRegion,
path.distribution.encodingFormat == encodingFormat,
limit=limit)
print(str(len(data))+" data of type '"+_type+"' found.")
DISPLAY_LIMIT = 10
reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label",
"brainLocation.layer.id","brainLocation.layer.label", "contribution",
"brainLocation.layer.id","brainLocation.layer.label","distribution.name",
"distribution.contentUrl","distribution.encodingFormat"])
forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT])
dirpath = "./downloaded/"
forge.download(data, "distribution.contentUrl", dirpath)
# Supported filters for the time being are:
_type = "NeuronDensity"
brainRegion = "primary somatosensory cortex"
layer = "layer 2"
encodingFormat="application/xlsx"
limit=10
path = forge.paths("Dataset") # to have autocompletion on the properties
data = forge.search(path.type.id == _type,
path.brainLocation.layer.label == layer,
path.brainLocation.brainRegion.label == brainRegion,
path.distribution.encodingFormat == encodingFormat,
limit=limit)
print(str(len(data))+" data of type '"+_type+"' found.")
1 data of type 'NeuronDensity' found.
DISPLAY_LIMIT = 10
reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"])
forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT])
dirpath = "./downloaded/"
forge.download(data, "distribution.contentUrl", dirpath)
# Let target the bbp/atlas project in Nexus
forge_atlas = KnowledgeGraphForge("prod-forge-nexus.yml", bucket="bbp/atlas", token=TOKEN)
Atlas related types: AtlasRelease BrainParcellationDataLayer CellDensityDataLayer GeneExpressionVolumetricDataLayer GliaCellDensity NISSLImageDataLayer
# Supported filters for the time being are:
_type = "BrainParcellationDataLayer"
limit=10
#path = forge_atlas.paths("Dataset") # to have autocompletion on the properties
data = forge_atlas.search(path.type.id == _type,
limit=limit)
print(str(len(data))+" data of type '"+_type+"' found.")
DISPLAY_LIMIT = 10
reshaped_data = forge_atlas.reshape(data, keep=["id","name","brainLocation.brainRegion.id","brainLocation.brainRegion.label", "contribution","distribution.name","distribution.contentUrl","distribution.encodingFormat"])
forge_atlas.as_dataframe(reshaped_data[:DISPLAY_LIMIT])
dirpath = "./downloaded/"
forge_atlas.download(data, "distribution.contentUrl", dirpath)
Tagged data are data with immutable identifiers. Such identifier gives the guarantee to retrieve the state of the data at the time the tag was created. Tag here is similaar to git tag.
bucket = "bbp/lnmce"
forge_tag = KnowledgeGraphForge("prod-forge-nexus.yml", bucket=bucket, token=TOKEN)
tag = "LNMCE2020"
# Let search for Electrophysiology Traces
_type = "Trace"
classification_type="EType"
eType="bIR"
brainRegion = "primary somatosensory cortex"
encodingFormat="application/nwb"
limit=10
path = forge_tag.paths("Dataset") # to have autocompletion on the properties
data = forge_tag.search(path.type.id == _type,
path.annotation.hasBody.type.id ==classification_type, # Known issue: use path.annotation.hasBody.type.id in case of error: AttributeError: 'PathWrapper' object has no attribute '_path'
path.annotation.hasBody.label ==eType,
path.brainLocation.brainRegion.label == brainRegion,
path.distribution.encodingFormat == encodingFormat,
limit=limit)
print(str(len(data))+" data of type '"+_type+"' found.")
10 data of type 'Trace' found.
results = [forge_tag.retrieve(d.id, version=tag) for d in data]
print(str(f"{len(results)} data of type '{_type}' at tag {tag} found."))
DISPLAY_LIMIT = 10
reshaped_data = forge_tag.reshape(results, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"])
forge_tag.as_dataframe(reshaped_data[:DISPLAY_LIMIT])
dirpath = "./downloaded/"
forge_tag.download(results, "distribution.contentUrl", dirpath)
A view exposes a subset of data for query and access in specialised indices (SPARQL, ElasticSearch).
# Here is an example of view url
view_url = "https://bluebrain.github.io/nexus/vocabulary/lnmce2020SparqlIndex"
searchendpoints = {"sparql":{"endpoint":"https://bluebrain.github.io/nexus/vocabulary/lnmce2020SparqlIndex"}}
forge_view = KnowledgeGraphForge("prod-forge-nexus.yml", bucket="bbp/lnmce", token=TOKEN, searchendpoints=searchendpoints)
# Let search for Electrophysiology Traces
_type = "Trace"
classification_type=":EType"
eType="bIR"
brainRegion = "primary somatosensory cortex"
encodingFormat="application/nwb"
limit=10
path = forge_view.paths("Dataset") # to have autocompletion on the properties
data = forge_view.search(path.type.id == _type,
path.annotation.hasBody.type.id ==classification_type, # Known issue: use path.annotation.hasBody.type.id in case of error: AttributeError: 'PathWrapper' object has no attribute '_path'
path.annotation.hasBody.label ==eType,
path.brainLocation.brainRegion.label == brainRegion,
path.distribution.encodingFormat == encodingFormat,
limit=limit)
print(str(len(data))+" data of type '"+_type+"' found.")
10 data of type 'Trace' found.
DISPLAY_LIMIT = 10
reshaped_data = forge_tag.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"])
forge_view.as_dataframe(reshaped_data[:DISPLAY_LIMIT])
dirpath = "./downloaded/"
forge_view.download(data, "distribution.contentUrl", dirpath)