Note: If you are on Binder, you don't need to execute the following command.
!pip install nexusforge[linking_sklearn]
This notebook presents a set of configuation options to set up when creating a knowledge graph forge session. Refer to the Nexus Forge docs to learn more about all the possible configuration options.
config = dict()
This configuration is for testing Nexus Forge features without using or deploying a persistent store. Not all features are accessible with the demo configuration. The demo configuration is therefore not recommendeded for production use.
config['Model'] = {
"name": "DemoModel",
"origin": "directory",
"source": "../../../tests/data/demo-model/",
}
config["Store"] = {
"name": "DemoStore",
"versioned_id_template": "{x.id}?_version={x._store_metadata.version}"
}
config["Resolvers"] = {
"terms": [
{
"resolver": "DemoResolver",
"origin": "directory",
"source": "../../../tests/data/demo-resolver/",
"targets": [
{
"identifier": "sexontology",
"bucket": "sex.json"
}
],
"result_resource_mapping": "../../configurations/demo-resolver/term-to-resource-mapping.hjson"
}
],
"entities": [
{
"resolver": "DemoResolver",
"origin": "directory",
"source": "../../../tests/data/demo-resolver/",
"targets": [
{
"identifier": "agents",
"bucket": "agents.json"
}
],
"result_resource_mapping": "../../configurations/demo-resolver/entity-to-resource-mapping.hjson"
}
],
"schemaorg": [
{
"resolver": "EntityLinkerSkLearn from kgentitylinkingsklearn",
"origin": "directory",
"source": "../../data/",
"targets": [
{
"identifier": "terms",
"bucket": "tfidfvectorizer_model_schemaorg_linking"
}
],
"result_resource_mapping": "../../configurations/entitylinking-resolver/entitylinking-mapper.hjson"
}
]
}
The Nexus sandbox application can be used to login and get a token.
import getpass
token = getpass.getpass()
endpoint = "https://sandbox.bluebrainnexus.io/v1"
org ="github-users"
project ="mfsy" # Provide here the automatically created project name corresponding to your Github login when you logged in the Nexus sandbox instance.
This model supports the W3C SHACL schema language. Let use examples of SHACL schemas from https://github.com/INCF/neuroshapes. SHACL schemas can be loaded either from a directory or from a store.
shacl_schema_bucket = "neurosciencegraph/datamodels"
config['Model'] = {
"name": "RdfModel",
"origin": "store",
"source": "BlueBrainNexus",
"context": {
"iri": "https://bbp.neuroshapes.org",
"bucket": shacl_schema_bucket
}
}
For the following tutorials please keep the following Model configuration:
neuroshapes_path = "../../models/neuroshapes"
! rm -Rf $neuroshapes_path
! git clone https://github.com/INCF/neuroshapes.git $neuroshapes_path
Cloning into '../../models/neuroshapes'... remote: Enumerating objects: 8445, done. remote: Counting objects: 100% (30/30), done. remote: Compressing objects: 100% (27/27), done. remote: Total 8445 (delta 11), reused 7 (delta 2), pack-reused 8415 Receiving objects: 100% (8445/8445), 7.59 MiB | 17.78 MiB/s, done. Resolving deltas: 100% (4199/4199), done.
! cp -R $neuroshapes_path/shapes/neurosciencegraph/datashapes/core/dataset $neuroshapes_path/shapes/neurosciencegraph/commons/
! cp -R $neuroshapes_path/shapes/neurosciencegraph/datashapes/core/activity $neuroshapes_path/shapes/neurosciencegraph/commons/
! cp -R $neuroshapes_path/shapes/neurosciencegraph/datashapes/core/entity $neuroshapes_path/shapes/neurosciencegraph/commons/
! cp -R $neuroshapes_path/shapes/neurosciencegraph/datashapes/core/ontology $neuroshapes_path/shapes/neurosciencegraph/commons/
! cp -R $neuroshapes_path/shapes/neurosciencegraph/datashapes/core/person $neuroshapes_path/shapes/neurosciencegraph/commons/
! cp -R $neuroshapes_path/shapes/neurosciencegraph/datashapes/core/contribution $neuroshapes_path/shapes/neurosciencegraph/commons/
config['Model'] = {
"name": "RdfModel",
"origin": "directory",
"source": f"{neuroshapes_path}/shapes/neurosciencegraph/commons/",
"context": {
"iri": "../../models/neuroshapes_context.json"
},
}
config["Store"] = {
"name": "BlueBrainNexus",
"endpoint": endpoint,
"searchendpoints":{
"sparql":{
"endpoint":"https://bluebrain.github.io/nexus/vocabulary/defaultSparqlIndex"
},
"elastic":{
"endpoint":"https://bluebrain.github.io/nexus/vocabulary/defaultElasticSearchIndex"
}
},
"bucket": f"{org}/{project}",
"token": token,
"vocabulary":{
"metadata":{
"iri": "https://bluebrain.github.io/nexus/contexts/metadata.json",
"local_iri": "https://bluebrainnexus.io/contexts/metadata.json"
},
"namespace": "https://bluebrain.github.io/nexus/vocabulary/",
"deprecated_property": "https://bluebrain.github.io/nexus/vocabulary/deprecated",
"project_property": "https://bluebrain.github.io/nexus/vocabulary/project"
},
"max_connection": 50,
"versioned_id_template": "{x.id}?rev={x._store_metadata._rev}",
"file_resource_mapping": "../../configurations/nexus-store/file-to-resource-mapping.hjson"
}
ontology_bucket = "neurosciencegraph/datamodels"
config["Resolvers"] = {
"terms": [
{
"resolver": "OntologyResolver",
"origin": "store",
"source": "BlueBrainNexus",
"targets": [
{
"identifier": "sexontology",
"bucket": ontology_bucket
}
],
"result_resource_mapping": "../../configurations/nexus-resolver/term-to-resource-mapping.hjson"
}
],
"entities": [
{
"resolver": "DemoResolver",
"origin": "directory",
"source": "../../../tests/data/demo-resolver/",
"targets": [
{
"identifier": "agents",
"bucket": "agents.json"
}
],
"result_resource_mapping": "../../configurations/demo-resolver/entity-to-resource-mapping.hjson"
}
],
"schemaorg": [
{
"resolver": "EntityLinkerSkLearn from kgentitylinkingsklearn",
"origin": "directory",
"source": "../../data/",
"targets": [
{
"identifier": "terms",
"bucket": "tfidfvectorizer_model_schemaorg_linking"
}
],
"result_resource_mapping": "../../configurations/entitylinking-resolver/entitylinking-mapper.hjson"
}
]
}
config["Resolvers"] = {
"terms": [
{
"resolver": "DemoResolver",
"origin": "directory",
"source": "../../../tests/data/demo-resolver/",
"targets": [
{
"identifier": "sexontology",
"bucket": "sex.json"
}
],
"result_resource_mapping": "../../configurations/demo-resolver/term-to-resource-mapping.hjson"
}
],
"ontology": [
{
"resolver": "DemoResolver",
"origin": "directory",
"source": "../../../tests/data/demo-resolver/",
"targets": [
{
"identifier": "cells",
"bucket": "cell_types.json"
}
],
"result_resource_mapping": "../../configurations/demo-resolver/term-to-resource-mapping.hjson"
}
],
"entities": [
{
"resolver": "DemoResolver",
"origin": "directory",
"source": "../../../tests/data/demo-resolver/",
"targets": [
{
"identifier": "agents",
"bucket": "agents.json"
}
],
"result_resource_mapping": "../../configurations/demo-resolver/entity-to-resource-mapping.hjson"
}
],
"schemaorg": [
{
"resolver": "EntityLinkerSkLearn from kgentitylinkingsklearn",
"origin": "directory",
"source": "../../data/",
"targets": [
{
"identifier": "terms",
"bucket": "tfidfvectorizer_model_schemaorg_linking"
}
],
"result_resource_mapping": "../../configurations/entitylinking-resolver/entitylinking-mapper.hjson"
}
]
}
config["Formatters"] = {
"identifier": "https://kg.example.ch/{}/{}",
}
import yaml
with open("../../configurations/forge.yml", "w") as f:
yaml.dump(config, f)