#!/usr/bin/env python
# coding: utf-8

# # Tutorial: Integrate Neuroscience Datasets from Multiple Sources using MINDS

# ## Initialize and configure

# In[ ]:


get_ipython().system('pip install nexusforge==0.6.2')


# In[ ]:


get_ipython().system('pip install allensdk')


# In[ ]:


get_ipython().system('pip install neurom[plotly]==3.0.1')


# In[ ]:


get_ipython().system('pip install --upgrade nest-asyncio==1.5.1')


# ### Get an authentication token

# The [Nexus sandbox application](https://sandbox.bluebrainnexus.io/web) can be used to get a token:
# 
# - Step 1: From the [web page](https://sandbox.bluebrainnexus.io/web), click on the login button in the top right corner and follow the instructions on screen.
# 
# - Step 2: You will then see a `Copy token` button in the top right corner. Click on it to copy the token to the clipboard.
# 

# Once a token is obtained, proceed to paste it as the value of the `TOKEN` variable below.
# 
# __Important__: A Nexus token is valid for 8 hours, if your working session is open for more than 8 hours, you may need to refresh the value of the token and reintialize the forge client in the _'Configure a forge client to store, manage and access datasets'_ section below.

# In[ ]:


import getpass


# In[ ]:


TOKEN = getpass.getpass()


# ### Configure a forge client to store, manage and access datasets

# In[ ]:


import uuid
import base64
import requests
import json
from pathlib import Path

from kgforge.core import KnowledgeGraphForge
from kgforge.specializations.mappings import DictionaryMapping

from allensdk.api.queries.cell_types_api import CellTypesApi
from allensdk.core.cell_types_cache import CellTypesCache


# In[ ]:


r = requests.get('https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/rdfmodel/jsonldcontext.json')
dirpath = './rdfmodel'
Path(dirpath).mkdir(parents=True, exist_ok=True)
with open(f'{dirpath}/jsonldcontext.json', 'w') as outfile:
    json.dump(r.json(), outfile)


# In[ ]:


ORG = "github-users"
PROJECT = ""  # Provide here the automatically created project name created when you logged into the Nexus sandbox instance.


# In[ ]:


forge = KnowledgeGraphForge("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/forge.yml",
                            bucket=f"{ORG}/{PROJECT}",
                            endpoint="https://sandbox.bluebrainnexus.io/v1",
                            token=TOKEN)


# ## Download datasets from Allen Cell Types Database and MouseLight

# ### Download mouse neuron morphologies from the Allen Cell Types Database

# We will be downloading mouse neuron morphology data from the [Allen Cell Types Database](https://celltypes.brain-map.org/). The [AllenSDK](https://allensdk.readthedocs.io/en/latest/) can be used for data download.

# In[ ]:


ALLEN_DIR = "allen_cell_types_database"


# In[ ]:


ctc = CellTypesCache(manifest_file=f"{ALLEN_DIR}/manifest.json")


# In[ ]:


MAX_CELLS = 10 # Increase to include more cells
SPECIES = CellTypesApi.MOUSE


# In[ ]:


nm_allen_identifiers = [cell["id"] for cell in ctc.get_cells(species=[SPECIES], require_reconstruction = True)][:MAX_CELLS]
print(f"Selected a mouse neuron with identifier: {nm_allen_identifiers}")


# Select metadata

# In[ ]:


with open(f"{ALLEN_DIR}/cells.json") as f:
    allen_cell_types_metadata = json.load(f)


# In[ ]:


nm_allen_metadata = [neuron for neuron in allen_cell_types_metadata if neuron["specimen__id"] in nm_allen_identifiers]


# Download reconstruction files

# In[ ]:


for identifier in nm_allen_identifiers:
    ctc.get_reconstruction(identifier)


# ### Download mouse neuron electrophysiology recordings from the Allen Cell Types Database

# Download Electrophysiology recordings

# In[ ]:


for identifier in nm_allen_identifiers:
    ctc.get_ephys_data(identifier)


# ### Download mouse neuron morphologies from MouseLight project

# We will be downloading mouse neuron morphology data from the [MouseLight project](https://www.janelia.org/project-team/mouselight).

# In[ ]:


URL_GRAPHQL = "http://ml-neuronbrowser.janelia.org/graphql/"


# In[ ]:


URL_JSON = "http://ml-neuronbrowser.janelia.org/json/"


# In[ ]:


URL_SWC = "http://ml-neuronbrowser.janelia.org/swc/"


# Select metadata

# In[ ]:


nm_request = requests.post(URL_GRAPHQL, json={"operationName":"SearchNeurons",
                                "variables":{
                                    "context":{
                                        "scope":6,
                                        "nonce":"cjyo7xu7k00033h5yrj9jfpoy",
                                        "predicates":[{
                                            "predicateType":3,
                                            "tracingIdsOrDOIs":["1"],
                                            "tracingIdsOrDOIsExactMatch":False,
                                            "tracingStructureIds":["68e76074-1777-42b6-bbf9-93a6a5f02fa4"],
                                            "nodeStructureIds":["c37953e1-a1e9-4b9a-847e-08d9566ced65"],
                                            "operatorId":None,
                                            "amount":0,
                                            "brainAreaIds":[],
                                            "arbCenter":{
                                                "x":None,
                                                "y":None,
                                                "z":None},
                                            "arbSize":None,
                                            "invert":False,
                                            "composition":3
                                        }]
                                    }
                                },
                                "query":"""query SearchNeurons($context: SearchContext) {\n  searchNeurons(context: $context) 
                                            {\n    totalCount\n    queryTime\n    nonce\n    \n    neurons {\n      id\n      
                                            idString\n      tracings {\n        id\n        tracingStructure {\n          id\n          
                                            name\n          value\n          __typename\n        }\n        soma {\n          id\n          
                                            x\n          y\n          z\n          radius\n          parentNumber\n          
                                            sampleNumber\n          brainAreaIdCcfV30\n          structureIdentifierId\n          
                                            __typename\n        }\n        __typename\n      }\n      __typename\n    }\n    
                                            __typename\n  }\n}\n"""
                               })
nm_mouselight_graphql = json.loads(nm_request.text)["data"]["searchNeurons"]["neurons"]
nm_mouselight_names = [x["idString"] for x in nm_mouselight_graphql]


# In[ ]:


nm_mouselight_metadata = list()
for name in nm_mouselight_names[0:MAX_CELLS]:
    a = requests.post(URL_JSON, 
                          json={"ids": [name]},
                          headers={"Accept": "*/*", "Content-Type": "application/json"})
    c = json.loads(a.content.decode('utf-8'))
    nm_mouselight_metadata.append(c["contents"])


# In[ ]:


for i, nm in enumerate(nm_mouselight_metadata):
    allenId = nm["neurons"][0]["soma"]["allenId"]
    allenInfo = nm["neurons"][0]["allenInformation"]
    for info in allenInfo:
        if info["allenId"] == allenId:
            allenLabel = info["name"]
    nm_mouselight_metadata[i]["neurons"][0]["allenLabel"] = allenLabel


# Download reconstruction files

# In[ ]:


for name in nm_mouselight_names[0:MAX_CELLS]:
    a = requests.post(URL_SWC, 
                      json={"ids": [name]},
                      headers={"Accept": "*/*", "Content-Type": "application/json"})
    c = eval(a.content)
    base64_message = c["contents"]
    base64_bytes = base64_message.encode('ascii')
    message_bytes = base64.b64decode(base64_bytes)
    dirpath = './mouselight'
    Path(dirpath).mkdir(parents=True, exist_ok=True)
    with open(f"{dirpath}/{name}.swc", "wb") as f:
        f.write(message_bytes)


# ## Mapping

# ### Map the Allen Cell Types Database neuron morphologies to Neuroshapes

# In[ ]:


allen_nm_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/allen_morphology_dataset.hjson") # TODO
nm_allen_resources = forge.map(nm_allen_metadata, allen_nm_mapping)


# ### Map the Allen Cell Types Database neuron electrophysiology recordings to Neuroshapes

# In[ ]:


allen_ephys_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/allen_ephys_dataset.hjson") # TODO
nephys_allen_resources = forge.map(nm_allen_metadata, allen_ephys_mapping)


# ### Map the MouseLight neuron morphologies to Neuroshapes

# In[ ]:


mouselight_nm_mapping = DictionaryMapping.load("https://raw.githubusercontent.com/BlueBrain/nexus/ef830192d4e7bb95f9351c4bdab7b0114c27e2f0/docs/src/main/paradox/docs/getting-started/notebooks/mappings/mouselight_dataset.hjson") # TODO
nm_mouselight_resources = forge.map(nm_mouselight_metadata, mouselight_nm_mapping)


# ## Register
# 
# If the registration fails, try refreshing the access token and reinitializing the forge client in the _'Configure a forge client to store, manage and access datasets'_ section.

# ### Register the Allen Cell Types Database neuron morphologies

# In[ ]:


for resource in nm_allen_resources:
    resource.id = forge.format("identifier", "neuronmorphologies", str(uuid.uuid4()))


# In[ ]:


forge.register(nm_allen_resources)


# ### Register the Allen Cell Types Database neuron electrophysiology recordings

# In[ ]:


for resource in nephys_allen_resources:
    resource.id = forge.format("identifier", "traces", str(uuid.uuid4()))


# In[ ]:


forge.register(nephys_allen_resources)


# ### Register the MouseLight neuron morphologies

# In[ ]:


for resource in nm_mouselight_resources:
    resource.id = forge.format("identifier", "neuronmorphologies", str(uuid.uuid4()))


# In[ ]:


forge.register(nm_mouselight_resources)


# ### Save the created resources in JSON files

# In[ ]:


dirpath = './database'
Path(dirpath).mkdir(parents=True, exist_ok=True)
with open(f"{dirpath}/mouselight-protocols.json","w") as f:
    json.dump(forge.as_jsonld(nm_mouselight_resources, form="expanded"),f)


# In[ ]:


with open(f"{dirpath}/allen-morphologies-protocols.json","w") as f:
    json.dump(forge.as_jsonld(nm_allen_resources, form="expanded"),f)


# In[ ]:


with open(f"{dirpath}/allen-ephys-protocols.json","w") as f:
    json.dump(forge.as_jsonld(nephys_allen_resources, form="expanded"),f)


# ## Access

# ### Set filters

# In[ ]:


_type = "NeuronMorphology"

filters = {"type": _type}


# ### Run Query

# In[ ]:


number_of_results = 10  # You can limit the number of results, pass `None` to fetch all the results

data = forge.search(filters, limit=number_of_results)

print(f"{str(len(data))} dataset(s) of type {_type} found")


# ### Display the results as pandas dataframe

# In[ ]:


property_to_display = ["id","name","subject","brainLocation.brainRegion.id","brainLocation.brainRegion.label","brainLocation.layer.id","brainLocation.layer.label", "contribution","brainLocation.layer.id","brainLocation.layer.label","distribution.name","distribution.contentUrl","distribution.encodingFormat"]
reshaped_data = forge.reshape(data, keep=property_to_display)

forge.as_dataframe(reshaped_data)


# ### Download

# In[ ]:


dirpath = "./downloaded/"
forge.download(data, "distribution.contentUrl", dirpath, overwrite=True)


# In[ ]:


ls ./downloaded/


# ### Display a result as 3D Neuron Morphology

# In[ ]:


from neurom import load_morphology
from neurom.view.plotly_impl import plot_morph3d
import IPython


# In[ ]:


neuron = load_morphology(f"{dirpath}/{data[0].distribution.name}")
plot_morph3d(neuron, inline=False)
IPython.display.HTML(filename='./morphology-3D.html')


# ## Version the dataset
# Tagging a dataset is equivalent to `git tag`. It allows to version a dataset.

# In[ ]:


forge.tag(data, value="releaseV112")


# In[ ]:


# The version argument can be specified to retrieve the dataset at a given tag.

tagged_data = forge.retrieve(id=data[0].id, version="releaseV112")


# In[ ]:


forge.as_dataframe(tagged_data)


# In[ ]:


data[0].description="Neuron Morphology from Allen"


# In[ ]:


forge.update(data[0])


# In[ ]:


non_tagged_data = forge.retrieve(id=data[0].id)


# In[ ]:


forge.as_dataframe(non_tagged_data)


# In[ ]: