#!/usr/bin/env python # coding: utf-8 # # Searching and Downloading Data from the Blue Brain Knowledge Graph using the Knowledge Graph Forge # ## Initialize and configure # ### Get an authentication token # For now, the [Nexus web application](https://bbp.epfl.ch/nexus/web) can be used to get a token. We are looking for other simpler alternatives. # # - Step 1: From the opened web page, click on the login button on the right corner and follow the instructions. # # ![login-ui](./login-ui.png) # # - Step 2: At the end you’ll see a token button on the right corner. Click on it to copy the token. # # ![login-ui](./copy-token.png) # # Once a token is obtained then proceed to paste it below. # In[ ]: import getpass # In[ ]: TOKEN = getpass.getpass() # ### Configure a client (forge) to access the knowledge graph # In[ ]: from kgforge.core import KnowledgeGraphForge # In[ ]: # Let target the sscx dissemination project in Nexus ORG = "public" PROJECT = "sscx" # In[ ]: forge = KnowledgeGraphForge("prod-forge-nexus.yml",bucket=f"{ORG}/{PROJECT}",token=TOKEN) # ## Search and Download # In[ ]: forge.types() # ### For ontologies # #### Set filters # In[ ]: # Supported filters for the time being are: from kgforge.core.commons.strategies import ResolvingStrategy text = "somatosensory" limit=10 # In[ ]: # other Search strategy can be ResolvingStrategy.BEST_MATCH, ResolvingStrategy.EXACT_MATCH brain_region = forge.resolve(text, scope="ontology", target="terms", strategy=ResolvingStrategy.ALL_MATCHES, limit=limit) # In[ ]: forge.as_dataframe(brain_region).head(100) # ### For Morphologies # #### Set filters # In[ ]: # Supported filters for the time being are: _type = "ReconstructedCell" classification_type="nsg:MType" mType="L4_NBC" brainRegion = "primary somatosensory cortex" layer = "layer 4" encodingFormat="application/swc" limit=2 # In[ ]: forge.template("Dataset") # #### Run Query # In[ ]: path = forge.paths("Dataset") # to have autocompletion on the properties data = forge.search(path.type.id == _type, path.annotation.hasBody.type ==classification_type, path.annotation.hasBody.label ==mType, path.brainLocation.brainRegion.label == brainRegion, path.brainLocation.layer.label == layer, path.distribution.encodingFormat == encodingFormat, limit=limit) print(str(len(data))+" dataset of type '"+_type+"' found.") # #### Display the results # In[ ]: DISPLAY_LIMIT = 10 reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]) forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT]) # #### Dowload # In[ ]: dirpath = "./downloaded/" forge.download(data, "distribution.contentUrl", dirpath) # ### For Trace # #### Set filters # In[ ]: # Supported filters for the time being are: _type = "Trace" classification_type="nsg:EType" eType="cADpyr" brainRegion = "primary somatosensory cortex" layer = "layer 5" encodingFormat="application/nwb" limit=10 # #### Run Query # In[ ]: path = forge.paths("Dataset") # to have autocompletion on the properties data = forge.search(path.type.id == _type, path.annotation.hasBody.type ==classification_type, path.annotation.hasBody.label ==eType, path.brainLocation.brainRegion.label == brainRegion, path.brainLocation.layer.label == layer, path.distribution.encodingFormat == encodingFormat, limit=limit) print(str(len(data))+" data of type '"+_type+"' found.") # #### Display the results # In[ ]: DISPLAY_LIMIT = 10 reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]) forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT]) # #### Dowload # In[ ]: dirpath = "./downloaded/" forge.download(data, "distribution.contentUrl", dirpath) # ### For LayerThickness # #### Set filters # In[ ]: # Supported filters for the time being are: _type = "LayerThickness" brainRegion = "primary somatosensory cortex" layer = "layer 2" encodingFormat="application/xlsx" limit=10 # #### Run query # In[ ]: path = forge.paths("Dataset") # to have autocompletion on the properties data = forge.search(path.type.id == _type, path.brainLocation.layer.label == layer, path.brainLocation.brainRegion.label == brainRegion, path.distribution.encodingFormat == encodingFormat, limit=limit) print(str(len(data))+" data of type '"+_type+"' found.") # #### Display Results # In[ ]: DISPLAY_LIMIT = 10 reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]) forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT]) # #### Dowload # In[ ]: dirpath = "./downloaded/" forge.download(data, "distribution.contentUrl", dirpath) # ### For Neuron Density # #### Set filters # In[ ]: # Supported filters for the time being are: _type = "NeuronDensity" brainRegion = "primary somatosensory cortex" layer = "layer 2" encodingFormat="application/xlsx" limit=10 # #### Run query # In[ ]: path = forge.paths("Dataset") # to have autocompletion on the properties data = forge.search(path.type.id == _type, path.brainLocation.layer.label == layer, path.brainLocation.brainRegion.label == brainRegion, path.distribution.encodingFormat == encodingFormat, limit=limit) print(str(len(data))+" data of type '"+_type+"' found.") # #### Display Results # In[ ]: DISPLAY_LIMIT = 10 reshaped_data = forge.reshape(data, keep=["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]) forge.as_dataframe(reshaped_data[:DISPLAY_LIMIT]) # #### Dowload # In[ ]: dirpath = "./downloaded/" forge.download(data, "distribution.contentUrl", dirpath) # ### For Atlas Release # In[ ]: # Let target the bbp/atlas project in Nexus forge_atlas = KnowledgeGraphForge("prod-forge-nexus.yml", bucket="bbp/atlas", token=TOKEN) # Atlas related types: # AtlasRelease # CellPositions # BrainParcellationDataLayer # CellDensityDataLayer # GeneExpressionVolumetricDataLayer # GliaCellDensity # NISSLImageDataLayer # #### Set filters # In[ ]: # Supported filters for the time being are: _type = "CellPositions" limit=10 # #### Run query # In[ ]: path = forge_atlas.paths("Dataset") # to have autocompletion on the properties data = forge_atlas.search(path.type.id == _type, limit=limit) print(str(len(data))+" data of type '"+_type+"' found.") # #### Display Results # In[ ]: DISPLAY_LIMIT = 10 reshaped_data = forge_atlas.reshape(data, keep=["id","name","brainLocation.brainRegion.id","brainLocation.brainRegion.label", "contribution","distribution.name","distribution.contentUrl","distribution.encodingFormat"]) forge_atlas.as_dataframe(reshaped_data[:DISPLAY_LIMIT]) # In[ ]: dirpath = "./downloaded/" forge_atlas.download(data, "distribution.contentUrl", dirpath)