#!/usr/bin/env python # coding: utf-8 # # Allen dataset integration # # This notebook focuses demonstrate how to integrate Allen datasets into the Blue Brain Knowledge Graph # # The tasks to be demonstrated are the following: # # 1. Configuration # 2. Retrieve human neuron morphologies from the Allen Cell Types Database # 3. Load the complete metadata of the neuron morphologies from Allen # 4. Load the transformation Mappings # 5. Map the neuron morphologies from Allen to the Neuroshapes Models # 6. Add the created entities from Allen to Nexus # 7. Retrieve the created entities # ## 1. Session configuration # In[1]: import getpass import allensdk from kgforge.core import KnowledgeGraphForge from kgforge.version import __version__ # Check versions # In[2]: print("Allensdk is", allensdk.__version__, ", and Nexus Fogre is", __version__) # Please enter your BBP token: # In[3]: # token = getpass.getpass() token = "eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICI5T0R3Z1JSTFVsTTJHbFphVDZjVklnenJsb0lzUWJmbTBDck1icXNjNHQ4In0.eyJqdGkiOiI4NmJiNmUxNi04OWFhLTRjYjktODNhMC0zYjc2MDU0NDk5NGYiLCJleHAiOjE1OTExNjA4NTIsIm5iZiI6MCwiaWF0IjoxNTkxMTMyMDUyLCJpc3MiOiJodHRwczovL2JicGF1dGguZXBmbC5jaC9hdXRoL3JlYWxtcy9CQlAiLCJzdWIiOiJmOjBmZGFkZWY3LWIyYjktNDkyYi1hZjQ2LWM2NTQ5MmQ0NTljMjphZ2FyY2lhIiwidHlwIjoiQmVhcmVyIiwiYXpwIjoibmV4dXMtd2ViIiwibm9uY2UiOiJjM2M5YmRhMTZkNTM0Zjk0ODgxZGRhYzZhZGQ2NjAzZCIsImF1dGhfdGltZSI6MTU5MTEzMjA1MSwic2Vzc2lvbl9zdGF0ZSI6ImIwM2FjYWM1LTIxOGItNDA5My05Y2ZmLTRlNTRhY2FhNTQxYSIsImFjciI6IjAiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9kZXYubmV4dXMub2NwLmJicC5lcGZsLmNoIiwiaHR0cHM6Ly9iYnAuZXBmbC5jaCIsImh0dHA6Ly9kZXYubmV4dXMub2NwLmJicC5lcGZsLmNoIiwiaHR0cHM6Ly9zdGFnaW5nLm5leHVzLm9jcC5iYnAuZXBmbC5jaCIsImh0dHBzOi8vYmJwLW5leHVzLmVwZmwuY2giLCJodHRwczovL2JicHRlYW0uZXBmbC5jaCIsImh0dHA6Ly9zdGFnaW5nLm5leHVzLm9jcC5iYnAuZXBmbC5jaCJdLCJzY29wZSI6Im9wZW5pZCBwcm9maWxlIGdyb3VwcyBlbWFpbCIsImVtYWlsX3ZlcmlmaWVkIjp0cnVlLCJuYW1lIjoiQWxlamFuZHJhIEdhcmNpYSBSb2phcyBHYXJjaWEgUm9qYXMgTWFydGluZXoiLCJwcmVmZXJyZWRfdXNlcm5hbWUiOiJhZ2FyY2lhIiwiZ2l2ZW5fbmFtZSI6IkFsZWphbmRyYSBHYXJjaWEgUm9qYXMiLCJmYW1pbHlfbmFtZSI6IkdhcmNpYSBSb2phcyBNYXJ0aW5leiIsImVtYWlsIjoiYWxlamFuZHJhLmdhcmNpYXJvamFzQGVwZmwuY2gifQ.oB7SCkvflnTudSlDINGueJLZalRcMhTNPeevFpJRajCXXlRivH4JRENbVRuYZDO5__N3KjYigRkRIhrP-AXBxj8TLv8nFpyGP6G_1T3BRRqed9EjCxowoA13tEb7x40U1BNnoEPdGau2YiH3149MEALgKpmCLftiCd1ooEwpSzd-6NiVJuWbdlZWd12OJYg3D2oL62pt5n7tCgKUBCIphgG2Okc9StU1Wm-P6UsT--23q-0WXdxODMUrvrOiWi1d9V50LHQ3gzIOqHx4MruBA4NgvE_3QcIOuysWvhZVifErnjKmYAiLhcslkW8Ecilj7wkOu_ZBd64TnBTUdYKnkA" # Note: Initialiting the forge may take a few seconds if the source is a directory # In[4]: forge = KnowledgeGraphForge("../../configurations/demo-forge-nexus-neuroshapes.yml", token=token) # ## 2. Retrieve human neuron morphologies from the Allen Cell Types Database # In[5]: from allensdk.core.cell_types_cache import CellTypesCache from allensdk.api.queries.cell_types_api import CellTypesApi # ### 2.A Downloaded files: Specify a directory where to download allen files # In[6]: ALLEN_DIR = "allen_cell_types_database" ctc = CellTypesCache(manifest_file=f"{ALLEN_DIR}/manifest.json") # In[7]: human_cells = ctc.get_cells(species=[CellTypesApi.HUMAN], require_reconstruction=True) # In[8]: get_ipython().system('ls allen_cell_types_database') # ### 2.B Pick a subset of cells to integrate # In[9]: len(human_cells) # In[13]: FROM = 8 TO = 10 human_cell_ids = [x["id"] for x in human_cells][FROM:TO] # In[14]: human_cell_ids # ### 2.C Check the data has not integrated those already by trying to fetch them with the Forge # # For the picked cells, we check that they are not already integrated. If they are already integrated, get another couple of ids in human_cell_ids in the step 2.B. # # Note that `forge.format` method is used to create the identifier for the patchedcells to be attempted to be retreived from Nexus. # In[15]: for id_ in human_cell_ids: kg_id = forge.format("identifier", "patchedcells", id_) print(kg_id) resource = forge.retrieve(kg_id) if resource: print("> already integrated") # In[16]: human_cell_reconstructions = [ctc.get_reconstruction(x) for x in human_cell_ids] # ## 3. Load the complete metadata of the neuron morphologies from Allen # In[17]: import json # In[18]: with open(f"{ALLEN_DIR}/cells.json") as f: allen_cell_types_metadata = json.load(f) human_cell_metadata = [x for x in allen_cell_types_metadata if x["specimen__id"] in human_cell_ids] # Have a look to a single record # In[19]: human_cell_metadata[0] # ## 4. Load the transformation Mappings # The forge a Dictionary Mapper that uses mapping files that provide the required transformation form a dictionary to another dictionary. The Dictionary Mappings are HJSON files containing the required transformations. This notebook has three mappings for: Subject, Patched Cell and Neuron Morphology. # In[20]: get_ipython().system('ls -l ../../mappings/allen-database-mappings') # In[21]: DIR = "../../mappings/allen-database-mappings" subject_mapping_file = f"{DIR}/Subject.hjson" patched_cel_mapping_file = f"{DIR}/PatchedCell.hjson" neuronmorphology_mapping_file = f"{DIR}/NeuronMorphology.hjson" # In[22]: from kgforge.specializations.mappings import DictionaryMapping # In[23]: subject_mapping = DictionaryMapping.load(subject_mapping_file) patchedcell_mapping = DictionaryMapping.load(patched_cel_mapping_file) neuronmorphology_mapping = DictionaryMapping.load(neuronmorphology_mapping_file) # One of he mapping file context is shown next: # In[24]: print(subject_mapping) # Inside mapping files, it is possible to use methos form the Forge such as: # # - forge.format : used to format a string using a preconfigured string format (used previously in 2.C) # - forge.resolve: used to retrieve identifiers using a string that is part of the name of the desired resource # # An example of the resolver is shown next: # In[25]: from kgforge.core.commons.strategies import ResolvingStrategy # Check available resolvers # In[26]: forge.resolvers() # Resolve the identifier for male in the terms scope and sex as target # In[27]: print(forge.resolve("male", scope="terms", target="sex")) # ## 5. Map the neuron morphologies from Allen to the Neuroshapes Models # # It is possible to provide a list of mappings to be applied to a single dataset. # In[30]: mappings = [subject_mapping, patchedcell_mapping, neuronmorphology_mapping] # In[31]: resources = forge.map(human_cell_metadata, mappings) # Check the created resources # In[32]: len(resources) # In[33]: print(resources[2]) # ### 6. Register the created resources from Allen to Nexus # In[34]: forge.register(resources) # In[35]: print(resources[1]) # ### 7. Retrieve the created entities # # If you know exactly the ID you can just retreive as did in 2.C or you can use the `search()` method. To search for resource you can start by picking a Type in the available types. # To create a search based on the `PatchedCell` structure, use the paths() method which will load the structure of the givent type in a Python object and these fields can be accessed using auto-completition. # Next, the `p` object will hold the properties of `PatchedCell` and can be used to create a search. # In[36]: p = forge.paths("PatchedCell") # In[37]: results = forge.search(p.type == "PatchedCell") # In[38]: len(results) # In[39]: DISPLAY_LIMIT = 25 # In[40]: forge.as_dataframe(results[:DISPLAY_LIMIT]) # In[41]: get_ipython().system(' rm -R allen_cell_types_database') # In[ ]: