#!/usr/bin/env python # coding: utf-8 # # Tutorial: Integrate Neuroscience Datasets from Multiple Sources using MINDS # ## Initialize and configure # In[ ]: get_ipython().system('pip install nexusforge==0.6.2') # In[ ]: get_ipython().system('pip install allensdk') # In[ ]: get_ipython().system('pip install neurom[plotly]==3.0.1') # In[ ]: get_ipython().system('pip install --upgrade nest-asyncio==1.5.1') # ### Get an authentication token # The [Nexus sandbox application](https://sandbox.bluebrainnexus.io/web) can be used to get a token: # # - Step 1: From the [web page](https://sandbox.bluebrainnexus.io/web), click on the login button in the top right corner and follow the instructions on screen. # # - Step 2: You will then see a `Copy token` button in the top right corner. Click on it to copy the token to the clipboard. # # Once a token is obtained, proceed to paste it as the value of the `TOKEN` variable below. # # __Important__: A Nexus token is valid for 8 hours, if your working session is open for more than 8 hours, you may need to refresh the value of the token and reintialize the forge client in the _'Configure a forge client to store, manage and access datasets'_ section below. # In[ ]: import getpass # In[ ]: TOKEN = getpass.getpass() # ### Configure a forge client to store, manage and access datasets # In[ ]: import uuid import base64 import requests import json from pathlib import Path from kgforge.core import KnowledgeGraphForge from kgforge.specializations.mappings import DictionaryMapping from allensdk.api.queries.cell_types_api import CellTypesApi from allensdk.core.cell_types_cache import CellTypesCache # In[ ]: r = requests.get('https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/rdfmodel/jsonldcontext.json') dirpath = './rdfmodel' Path(dirpath).mkdir(parents=True, exist_ok=True) with open(f'{dirpath}/jsonldcontext.json', 'w') as outfile: json.dump(r.json(), outfile) # In[ ]: ORG = "github-users" PROJECT = "" # Provide here the automatically created project name created when you logged into the Nexus sandbox instance. # In[ ]: forge = KnowledgeGraphForge("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/forge.yml", bucket=f"{ORG}/{PROJECT}", endpoint="https://sandbox.bluebrainnexus.io/v1", token=TOKEN) # ## Download datasets from Allen Cell Types Database and MouseLight # ### Download mouse neuron morphologies from the Allen Cell Types Database # We will be downloading mouse neuron morphology data from the [Allen Cell Types Database](https://celltypes.brain-map.org/). The [AllenSDK](https://allensdk.readthedocs.io/en/latest/) can be used for data download. # In[ ]: ALLEN_DIR = "allen_cell_types_database" # In[ ]: ctc = CellTypesCache(manifest_file=f"{ALLEN_DIR}/manifest.json") # In[ ]: MAX_CELLS = 10 # Increase to include more cells SPECIES = CellTypesApi.MOUSE # In[ ]: nm_allen_identifiers = [cell["id"] for cell in ctc.get_cells(species=[SPECIES], require_reconstruction = True)][:MAX_CELLS] print(f"Selected a mouse neuron with identifier: {nm_allen_identifiers}") # Select metadata # In[ ]: with open(f"{ALLEN_DIR}/cells.json") as f: allen_cell_types_metadata = json.load(f) # In[ ]: nm_allen_metadata = [neuron for neuron in allen_cell_types_metadata if neuron["specimen__id"] in nm_allen_identifiers] # Download reconstruction files # In[ ]: for identifier in nm_allen_identifiers: ctc.get_reconstruction(identifier) # ### Download mouse neuron electrophysiology recordings from the Allen Cell Types Database # Download Electrophysiology recordings # In[ ]: for identifier in nm_allen_identifiers: ctc.get_ephys_data(identifier) # ### Download mouse neuron morphologies from MouseLight project # We will be downloading mouse neuron morphology data from the [MouseLight project](https://www.janelia.org/project-team/mouselight). # In[ ]: URL_GRAPHQL = "http://ml-neuronbrowser.janelia.org/graphql/" # In[ ]: URL_JSON = "http://ml-neuronbrowser.janelia.org/json/" # In[ ]: URL_SWC = "http://ml-neuronbrowser.janelia.org/swc/" # Select metadata # In[ ]: nm_request = requests.post(URL_GRAPHQL, json={"operationName":"SearchNeurons", "variables":{ "context":{ "scope":6, "nonce":"cjyo7xu7k00033h5yrj9jfpoy", "predicates":[{ "predicateType":3, "tracingIdsOrDOIs":["1"], "tracingIdsOrDOIsExactMatch":False, "tracingStructureIds":["68e76074-1777-42b6-bbf9-93a6a5f02fa4"], "nodeStructureIds":["c37953e1-a1e9-4b9a-847e-08d9566ced65"], "operatorId":None, "amount":0, "brainAreaIds":[], "arbCenter":{ "x":None, "y":None, "z":None}, "arbSize":None, "invert":False, "composition":3 }] } }, "query":"""query SearchNeurons($context: SearchContext) {\n searchNeurons(context: $context) {\n totalCount\n queryTime\n nonce\n \n neurons {\n id\n idString\n tracings {\n id\n tracingStructure {\n id\n name\n value\n __typename\n }\n soma {\n id\n x\n y\n z\n radius\n parentNumber\n sampleNumber\n brainAreaIdCcfV30\n structureIdentifierId\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n""" }) nm_mouselight_graphql = json.loads(nm_request.text)["data"]["searchNeurons"]["neurons"] nm_mouselight_names = [x["idString"] for x in nm_mouselight_graphql] # In[ ]: nm_mouselight_metadata = list() for name in nm_mouselight_names[0:MAX_CELLS]: a = requests.post(URL_JSON, json={"ids": [name]}, headers={"Accept": "*/*", "Content-Type": "application/json"}) c = json.loads(a.content.decode('utf-8')) nm_mouselight_metadata.append(c["contents"]) # In[ ]: for i, nm in enumerate(nm_mouselight_metadata): allenId = nm["neurons"][0]["soma"]["allenId"] allenInfo = nm["neurons"][0]["allenInformation"] for info in allenInfo: if info["allenId"] == allenId: allenLabel = info["name"] nm_mouselight_metadata[i]["neurons"][0]["allenLabel"] = allenLabel # Download reconstruction files # In[ ]: for name in nm_mouselight_names[0:MAX_CELLS]: a = requests.post(URL_SWC, json={"ids": [name]}, headers={"Accept": "*/*", "Content-Type": "application/json"}) c = eval(a.content) base64_message = c["contents"] base64_bytes = base64_message.encode('ascii') message_bytes = base64.b64decode(base64_bytes) dirpath = './mouselight' Path(dirpath).mkdir(parents=True, exist_ok=True) with open(f"{dirpath}/{name}.swc", "wb") as f: f.write(message_bytes) # ## Mapping # ### Map the Allen Cell Types Database neuron morphologies to Neuroshapes # In[ ]: allen_nm_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/allen_morphology_dataset.hjson") # TODO nm_allen_resources = forge.map(nm_allen_metadata, allen_nm_mapping) # ### Map the Allen Cell Types Database neuron electrophysiology recordings to Neuroshapes # In[ ]: allen_ephys_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/allen_ephys_dataset.hjson") # TODO nephys_allen_resources = forge.map(nm_allen_metadata, allen_ephys_mapping) # ### Map the MouseLight neuron morphologies to Neuroshapes # In[ ]: mouselight_nm_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/mouselight_dataset.hjson") # TODO nm_mouselight_resources = forge.map(nm_mouselight_metadata, mouselight_nm_mapping) # ## Register # # If the registration fails, try refreshing the access token and reinitializing the forge client in the _'Configure a forge client to store, manage and access datasets'_ section. # ### Register the Allen Cell Types Database neuron morphologies # In[ ]: for resource in nm_allen_resources: resource.id = forge.format("identifier", "neuronmorphologies", str(uuid.uuid4())) # In[ ]: forge.register(nm_allen_resources) # ### Register the Allen Cell Types Database neuron electrophysiology recordings # In[ ]: for resource in nephys_allen_resources: resource.id = forge.format("identifier", "traces", str(uuid.uuid4())) # In[ ]: forge.register(nephys_allen_resources) # ### Register the MouseLight neuron morphologies # In[ ]: for resource in nm_mouselight_resources: resource.id = forge.format("identifier", "neuronmorphologies", str(uuid.uuid4())) # In[ ]: forge.register(nm_mouselight_resources) # ### Save the created resources in JSON files # In[ ]: dirpath = './database' Path(dirpath).mkdir(parents=True, exist_ok=True) with open(f"{dirpath}/mouselight-protocols.json","w") as f: json.dump(forge.as_jsonld(nm_mouselight_resources, form="expanded"),f) # In[ ]: with open(f"{dirpath}/allen-morphologies-protocols.json","w") as f: json.dump(forge.as_jsonld(nm_allen_resources, form="expanded"),f) # In[ ]: with open(f"{dirpath}/allen-ephys-protocols.json","w") as f: json.dump(forge.as_jsonld(nephys_allen_resources, form="expanded"),f) # ## Access # ### Set filters # In[ ]: _type = "NeuronMorphology" filters = {"type": _type} # ### Run Query # In[ ]: number_of_results = 10 # You can limit the number of results, pass `None` to fetch all the results data = forge.search(filters, limit=number_of_results) print(f"{str(len(data))} dataset(s) of type {_type} found") # ### Display the results as pandas dataframe # In[ ]: property_to_display = ["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"] reshaped_data = forge.reshape(data, keep=property_to_display) forge.as_dataframe(reshaped_data) # ### Download # In[ ]: dirpath = "./downloaded/" forge.download(data, "distribution.contentUrl", dirpath, overwrite=True) # In[ ]: ls ./downloaded/ # ### Display a result as 3D Neuron Morphology # In[ ]: from neurom import load_morphology from neurom.view.plotly_impl import plot_morph3d import IPython # In[ ]: neuron = load_morphology(f"{dirpath}/{data[0].distribution.name}") plot_morph3d(neuron, inline=False) IPython.display.HTML(filename='./morphology-3D.html') # ## Version the dataset # Tagging a dataset is equivalent to `git tag`. It allows to version a dataset. # In[ ]: forge.tag(data, value="releaseV112") # In[ ]: # The version argument can be specified to retrieve the dataset at a given tag. tagged_data = forge.retrieve(id=data[0].id, version="releaseV112") # In[ ]: forge.as_dataframe(tagged_data) # In[ ]: data[0].description="Neuron Morphology from Allen" # In[ ]: forge.update(data[0]) # In[ ]: non_tagged_data = forge.retrieve(id=data[0].id) # In[ ]: forge.as_dataframe(non_tagged_data) # In[ ]: