This notebook demonstrates Nexus Forge data querying features.
from kgforge.core import KnowledgeGraphForge
A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook 00-Initialization.ipynb.
forge = KnowledgeGraphForge("../../configurations/forge.yml")
from kgforge.core import Resource
from kgforge.specializations.resources import Dataset
from kgforge.core.wrappings.paths import Filter, FilterOperator
jane = Resource(type="Person", name="Jane Doe", award=["Nobel"])
forge.register(jane)
<action> _register_one <succeeded> True
resource = forge.retrieve(jane.id)
resource == jane
True
jane = Resource(type="Person", name="Jane Doe", award=["Nobel"])
forge.register(jane)
<action> _register_one <succeeded> True
forge.tag(jane, "v1")
<action> _tag_one <succeeded> True
jane.email = ["jane.doe@epfl.ch", "jane.doe@example.org"]
forge.update(jane)
<action> _update_one <succeeded> True
try:
# DemoStore
print(jane._store_metadata.version)
except:
# BlueBrainNexus
print(jane._store_metadata._rev)
3
jane_v1 = forge.retrieve(jane.id, version=1)
jane_v1_tag = forge.retrieve(jane.id, version="v1")
jane_v1_rev = forge.retrieve(jane.id+"?rev=1")
jane_v1 == jane_v1_tag
True
jane_v1 == jane_v1_rev
True
jane_v1 != jane
True
try:
# DemoStore
print(jane_v1._store_metadata.version)
except:
# BlueBrainNexus
print(jane_v1._store_metadata._rev)
1
It is possible to retrieve resources stored in buckets different then the configured one. The configured store should of course support it.
resource = forge.retrieve(jane.id, cross_bucket=True) # cross_bucket defaults to False
resource._store_metadata
{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/91246fb9-3193-4c56-b9b4-8e88d828273c', '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json', '_createdAt': '2024-02-15T13:28:29.759Z', '_createdBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah', '_deprecated': False, '_incoming': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/incoming', '_outgoing': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/outgoing', '_project': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah', '_rev': 3, '_schemaProject': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah', '_self': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c', '_updatedAt': '2024-02-15T13:28:29.960Z', '_updatedBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah'}
resource._last_action
Action(error=None, message=None, operation='retrieve', succeeded=True)
resource._synchronized
True
One can also use the value of _self
from ._stote_metadata to retrieve a resource
import copy
import string
import random
other_resource = copy.deepcopy(resource)
other_resource.id = f"https://my-incredible-id-{''.join(random.choices(string.digits, k=5))}"
forge.register(other_resource)
<action> _register_one <succeeded> True
url = other_resource._store_metadata['_self']
same_resource_url = forge.retrieve(id=url)
same_resource_id = forge.retrieve(id=other_resource.id)
Confirm they are the same
same_resource_id == same_resource_url
True
When using BlueBrainNexusStore, it is possible to retrieve resources' payload as they were registered (retrieve_source=True) without any changes related to store added metadata or JSONLD framing.
resource = forge.retrieve(jane.id, retrieve_source=False) # retrieve_source defaults to True
forge.as_json(resource)
{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/91246fb9-3193-4c56-b9b4-8e88d828273c', 'type': 'Person', 'award': 'Nobel', 'email': ['jane.doe@epfl.ch', 'jane.doe@example.org'], 'name': 'Jane Doe'}
resource._store_metadata
{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/91246fb9-3193-4c56-b9b4-8e88d828273c', '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json', '_createdAt': '2024-02-15T13:28:29.759Z', '_createdBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah', '_deprecated': False, '_incoming': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/incoming', '_outgoing': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c/outgoing', '_project': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah', '_rev': 3, '_schemaProject': 'https://sandbox.bluebrainnexus.io/v1/projects/github-users/ssssarah', '_self': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/https:%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F91246fb9-3193-4c56-b9b4-8e88d828273c', '_updatedAt': '2024-02-15T13:28:29.960Z', '_updatedBy': 'https://sandbox.bluebrainnexus.io/v1/realms/github/users/ssssarah'}
resource._last_action
resource._synchronized
False
resource = forge.retrieve("123")
<action> catch_http_error <error> RetrievalError: 404 Client Error: Not Found for url: https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/%3A%2F%2F123
resource is None
True
Note: DemoModel and RdfModel schemas have not been synchronized yet. This section is to be run with RdfModel. Commented lines are for DemoModel.
jane = Resource(type="Person", name="Jane Doe")
contribution_jane = Resource(type="Contribution", agent=jane)
john = Resource(type="Person", name="John Smith")
contribution_john = Resource(type="Contribution", agent=john)
dataset = Dataset(forge, type="Dataset", contribution=[contribution_jane, contribution_john])
dataset.add_distribution("../../data/associations.tsv")
forge.register(dataset)
<action> _register_one <succeeded> True
forge.as_json(dataset)
{'id': 'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/e593f0b8-09f4-4438-b7a0-4024e06b47b6', 'type': 'Dataset', 'contribution': [{'type': 'Contribution', 'agent': {'type': 'Person', 'name': 'Jane Doe'}}, {'type': 'Contribution', 'agent': {'type': 'Person', 'name': 'John Smith'}}], 'distribution': {'type': 'DataDownload', 'atLocation': {'type': 'Location', 'store': {'id': 'https://bluebrain.github.io/nexus/vocabulary/diskStorageDefault', 'type': 'DiskStorage', '_rev': 1}}, 'contentSize': {'unitCode': 'bytes', 'value': 477}, 'contentUrl': 'https://sandbox.bluebrainnexus.io/v1/files/github-users/ssssarah/https%3A%2F%2Fsandbox.bluebrainnexus.io%2Fv1%2Fresources%2Fgithub-users%2Fssssarah%2F_%2F9863c9c1-6005-4634-9989-199ab4ed8bf1', 'digest': {'algorithm': 'SHA-256', 'value': '789aa07948683fe036ac29811814a826b703b562f7d168eb70dee1fabde26859'}, 'encodingFormat': 'text/tab-separated-values', 'name': 'associations.tsv'}}
The paths
method load the template or property paths (ie. expected properties) for a given type.
Please refer to the Modeling.ipynb notebook to learn about templates and types.
p = forge.paths("Dataset")
Autocompletion is enabled on p
and this can be used to create search filters.
Note: There is a known issue for RdfModel which requires using p.type.id
instead of p.type
.
All python comparison operators are supported.
resources = forge.search(p.type.id=="Person", limit=3)
type(resources)
list
len(resources)
3
forge.as_dataframe(resources)
id | type | name | distribution.type | distribution.atLocation.type | distribution.atLocation.store.id | distribution.atLocation.store.type | distribution.atLocation.store._rev | distribution.contentSize.unitCode | distribution.contentSize.value | distribution.contentUrl | distribution.digest.algorithm | distribution.digest.value | distribution.encodingFormat | distribution.name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | John Smith | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | Jane Doe | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | Jane Doe | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1.0 | bytes | 52.0 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 1dacd765946963fda4949753659089c5f532714b418d30... | text/csv | persons.csv |
forge.as_dataframe(resources, store_metadata=True)
id | type | name | _constrainedBy | _createdAt | _createdBy | _deprecated | _incoming | _outgoing | _project | ... | distribution.atLocation.store.id | distribution.atLocation.store.type | distribution.atLocation.store._rev | distribution.contentSize.unitCode | distribution.contentSize.value | distribution.contentUrl | distribution.digest.algorithm | distribution.digest.value | distribution.encodingFormat | distribution.name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | John Smith | https://bluebrain.github.io/nexus/schemas/unco... | 2024-02-13T13:04:27.840Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | Jane Doe | https://bluebrain.github.io/nexus/schemas/unco... | 2024-02-13T13:04:27.842Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | Jane Doe | https://bluebrain.github.io/nexus/schemas/unco... | 2024-02-13T13:04:28.270Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | ... | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1.0 | bytes | 52.0 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 1dacd765946963fda4949753659089c5f532714b418d30... | text/csv | persons.csv |
3 rows × 27 columns
# Search results are not synchronized
resources[0]._synchronized
True
Property autocompletion is available on a path p
even for nested properties like p.contribution
.
# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat
resources = forge.search(p.type.id == "Dataset", p.distribution.encodingFormat == "text/tab-separated-values", limit=3)
len(resources)
3
forge.as_dataframe(resources)
id | type | contribution | distribution.type | distribution.atLocation.type | distribution.atLocation.store.id | distribution.atLocation.store.type | distribution.atLocation.store._rev | distribution.contentSize.unitCode | distribution.contentSize.value | distribution.contentUrl | distribution.digest.algorithm | distribution.digest.value | distribution.encodingFormat | distribution.name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Dataset | [{'type': 'Contribution', 'agent': {'type': 'P... | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1 | bytes | 477 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 789aa07948683fe036ac29811814a826b703b562f7d168... | text/tab-separated-values | associations.tsv |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | Dataset | [{'type': 'Contribution', 'agent': {'type': 'P... | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1 | bytes | 477 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 789aa07948683fe036ac29811814a826b703b562f7d168... | text/tab-separated-values | associations.tsv |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Dataset | [{'type': 'Contribution', 'agent': {'type': 'P... | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1 | bytes | 477 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 789aa07948683fe036ac29811814a826b703b562f7d168... | text/tab-separated-values | associations.tsv |
A dictionary can be provided for filters:
This feature is not supported when using the DemoStore
# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat
# and created a given dateTime (by default, dateTime values should be signaled by the suffix "^^xsd:dateTime")
filters = {
"type": "Dataset",
"distribution":{"encodingFormat":"text/tab-separated-values"},
"_createdAt":dataset._store_metadata._createdAt+"^^xsd:dateTime"
}
resources = forge.search(filters, limit=3)
type(resources)
list
len(resources)
0
forge.as_dataframe(resources, store_metadata=True)
[f"{op.value} ({op.name})" for op in FilterOperator] # These are equivalent to the Python comparison operators
['__eq__ (EQUAL)', '__ne__ (NOT_EQUAL)', '__lt__ (LOWER_THAN)', '__le__ (LOWER_OR_Equal_Than)', '__gt__ (GREATER_Than)', '__ge__ (GREATER_OR_Equal_Than)']
# Search for resources of type Dataset and with text/tab-separated-values as distribution.encodingFormat
# and created a given dateTime (dateTime values should be signaled by the suffix "^^xsd:dateTime")
filter_1 = Filter(operator=FilterOperator.EQUAL, path=["type"], value="Dataset")
filter_2 = Filter(operator=FilterOperator.EQUAL, path=["distribution","encodingFormat"], value="text/tab-separated-values")
filter_3 = Filter(operator=FilterOperator.LOWER_OR_Equal_Than, path=["_createdAt"], value=dataset._store_metadata._createdAt+"^^xsd:dateTime")
resources = forge.search(filter_1, filter_2, filter_3, limit=3)
type(resources)
list
len(resources)
3
forge.as_dataframe(resources, store_metadata=True)
id | type | contribution | distribution.type | distribution.atLocation.type | distribution.atLocation.store.id | distribution.atLocation.store.type | distribution.atLocation.store._rev | distribution.contentSize.unitCode | distribution.contentSize.value | ... | _createdBy | _deprecated | _incoming | _outgoing | _project | _rev | _schemaProject | _self | _updatedAt | _updatedBy | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Dataset | [{'type': 'Contribution', 'agent': {'type': 'P... | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1 | bytes | 477 | ... | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | 1 | https://sandbox.bluebrainnexus.io/v1/projects/... | https://sandbox.bluebrainnexus.io/v1/resources... | 2024-02-02T09:47:31.662Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | Dataset | [{'type': 'Contribution', 'agent': {'type': 'P... | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1 | bytes | 477 | ... | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | 1 | https://sandbox.bluebrainnexus.io/v1/projects/... | https://sandbox.bluebrainnexus.io/v1/resources... | 2024-02-02T09:59:31.740Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Dataset | [{'type': 'Contribution', 'agent': {'type': 'P... | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1 | bytes | 477 | ... | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | 1 | https://sandbox.bluebrainnexus.io/v1/projects/... | https://sandbox.bluebrainnexus.io/v1/resources... | 2024-02-02T15:21:32.898Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... |
3 rows × 27 columns
Two types of search endpoints are supported: 'sparql' (default) for graph queries and 'elastic' for document oriented queries. The types of available search endpoint can be configured (see 00-Initialization.ipynb for an example of search endpoints config) or set when creating a KnowledgeGraphForge session using the 'searchendpoints' arguments.
The search endpoint to hit when calling forge.search(...) is 'sparql' by default but can be specified using the 'search_endpoint' argument.
The data that is available through these searchendpoints is limited to data indexed in specific indices that are configured through views. The project is set-up with default a elastic search view that indexes all data, and a default sparql view that indexes all data. You may define a view that targets a subset of the data based on some filters.
# Search for resources of type Person
filters = {"type": "Person"}
resources = forge.search(filters, limit=3, search_endpoint='sparql')
type(resources)
list
len(resources)
3
forge.as_dataframe(resources, store_metadata=True)
id | type | name | _constrainedBy | _createdAt | _createdBy | _deprecated | _incoming | _outgoing | _project | ... | distribution.atLocation.store.id | distribution.atLocation.store.type | distribution.atLocation.store._rev | distribution.contentSize.unitCode | distribution.contentSize.value | distribution.contentUrl | distribution.digest.algorithm | distribution.digest.value | distribution.encodingFormat | distribution.name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | John Smith | https://bluebrain.github.io/nexus/schemas/unco... | 2024-02-13T13:04:27.840Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | Jane Doe | https://bluebrain.github.io/nexus/schemas/unco... | 2024-02-13T13:04:27.842Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Person | Jane Doe | https://bluebrain.github.io/nexus/schemas/unco... | 2024-02-13T13:04:28.270Z | https://sandbox.bluebrainnexus.io/v1/realms/gi... | False | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/resources... | https://sandbox.bluebrainnexus.io/v1/projects/... | ... | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1.0 | bytes | 52.0 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 1dacd765946963fda4949753659089c5f532714b418d30... | text/csv | persons.csv |
3 rows × 27 columns
# Search for resources of type Person and retrieve their ids and names.
filters = {"@type": "http://schema.org/Person"}
resources = forge.search(
filters, limit=3, search_endpoint='elastic', includes=["@id", "@type"]
) # fields can also be excluded with 'excludes'
type(resources)
list
len(resources)
3
forge.as_dataframe(resources, store_metadata=True)
id | type | |
---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | http://schema.org/Person |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | http://schema.org/Person |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | http://schema.org/Person |
# Search results are not synchronized
resources[0]._synchronized
False
resources[0].id
'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/e01663b6-604b-42db-b958-da9fbc1baca2'
resources[0].type
'http://schema.org/Person'
import requests
payload = {
"@type": ["View", "ElasticSearchView"],
"pipeline": [{
"name": "filterByType",
"config": {"types": ["http://schema.org/Person"]}
}],
"mapping": {
"dynamic": True,
"properties": {
"@id": {
"type": "keyword"
},
"@type": {
"type": "keyword"
}
}
}
}
url = f"{forge._store.endpoint}/views/{forge._store.bucket}"
headers = {
"mode": "cors",
"Content-Type": "application/json",
"Accept": "application/ld+json, application/json",
"Authorization": "Bearer " + forge._store.token
}
response = requests.post(url=url, headers=headers, json=payload)
view_id = response.json()["@id"]
view_id
'https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/69311bb0-7509-4e39-933d-6ea1865ed9ed'
result_1 = forge.search({"@type": "Dataset"}, search_endpoint='elastic', view=view_id, cross_bucket=True, debug=True)
result_2 = forge.search({"@type": "Person"}, search_endpoint='elastic', view=view_id, cross_bucket=True, debug=True)
len(result_1), len(result_2)
Submitted query: {'query': {'bool': {'filter': [{'term': {'@type': 'Dataset'}}], 'must': [{'match': {'_deprecated': False}}], 'must_not': []}}, 'size': 100} Submitted query: {'query': {'bool': {'filter': [{'term': {'@type': 'Person'}}], 'must': [{'match': {'_deprecated': False}}], 'must_not': []}}, 'size': 100}
(0, 0)
Providing a view is feature that is also available through the forge.elastic, and forge.sparql calls
It is possible to search for resources stored in buckets different than the configured one. The configured store should of course support it.
resources = forge.search(p.type.id == "Association", limit=3, cross_bucket=True) # cross_bucket defaults to False
type(resources)
list
len(resources)
3
forge.as_dataframe(resources)
id | type | agent.type | agent.gender.id | agent.gender.type | agent.gender.label | agent.name | distribution.type | distribution.atLocation.type | distribution.atLocation.store.id | distribution.atLocation.store.type | distribution.atLocation.store._rev | distribution.contentSize.unitCode | distribution.contentSize.value | distribution.contentUrl | distribution.digest.algorithm | distribution.digest.value | distribution.encodingFormat | distribution.name | name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Association | Person | http://purl.obolibrary.org/obo/PATO_0000383 | LabeledOntologyEntity | female | Marie Curie | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1.0 | bytes | 46.0 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3... | text/plain | marie_curie.txt | Curie Association |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | Association | Person | http://purl.obolibrary.org/obo/PATO_0000384 | LabeledOntologyEntity | male | Albert Einstein | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1.0 | bytes | 50.0 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164... | text/plain | albert_einstein.txt | Einstein Association |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Association | Person | NaN | NaN | NaN | Jane Doe | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
#Furthermore it is possible to filter by bucket when cross_bucket is set to True. Setting a bucket value when cross_bucket is False will trigger a not_supported exception.
resources = forge.search(p.type.id == "Person", limit=3, cross_bucket=True, bucket="dke/kgforge") # add a bucket
type(resources)
list
len(resources)
0
forge.as_dataframe(resources)
When using BlueBrainNexusStore, it is possible to retrieve resources' payload as they were registered (retrieve_source=True) without any changes related to store added metadata or JSONLD framing.
resources = forge.search(p.type.id == "Association", limit=3, retrieve_source=False) # retrieve_source defaults to True
type(resources)
list
len(resources)
3
forge.as_dataframe(resources)
id | type | agent.type | agent.gender.id | agent.gender.type | agent.gender.label | agent.name | distribution.type | distribution.atLocation.type | distribution.atLocation.store.id | distribution.atLocation.store.type | distribution.atLocation.store._rev | distribution.contentSize.unitCode | distribution.contentSize.value | distribution.contentUrl | distribution.digest.algorithm | distribution.digest.value | distribution.encodingFormat | distribution.name | name | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Association | Person | http://purl.obolibrary.org/obo/PATO_0000383 | LabeledOntologyEntity | female | Marie Curie | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1.0 | bytes | 46.0 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | e0fe65f725bf28fe2b88c7bafb51fb5ef1df0ab14c68a3... | text/plain | marie_curie.txt | Curie Association |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | Association | Person | http://purl.obolibrary.org/obo/PATO_0000384 | LabeledOntologyEntity | male | Albert Einstein | DataDownload | Location | https://bluebrain.github.io/nexus/vocabulary/d... | DiskStorage | 1.0 | bytes | 50.0 | https://sandbox.bluebrainnexus.io/v1/files/git... | SHA-256 | 91a5ce5c84dc5bead730a4b49d0698b4aaef4bc06ce164... | text/plain | albert_einstein.txt | Einstein Association |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Association | Person | NaN | NaN | NaN | Jane Doe | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
SPARQL is used as a query language to perform graph traversing.
Nexus Forge implements a SPARQL query rewriting strategy leveraging a configured RDFModel that lets users write SPARQL queries without adding prefix declarations, prefix names or long IRIs. With this strategy, only type and property names can be provided.
Please refer to the Modeling.ipynb notebook to learn about templates.
Note: DemoStore doesn't implement SPARQL operations yet. Please use another store for this section.
Note: DemoModel and RdfModel schemas have not been synchronized yet. This section is to be run with RdfModel.
jane = Resource(type="Person", name="Jane Doe")
contribution_jane = Resource(type="Contribution", agent=jane)
john = Resource(type="Person", name="John Smith")
contribution_john = Resource(type="Contribution", agent=john)
association = Resource(type="Dataset", contribution=[contribution_jane, contribution_john])
forge.register(association)
<action> _register_one <succeeded> True
forge.template("Dataset") # Templates help know which property to use when writing a query to serach for a given type
{ id: "" type: { id: "" } annotation: { id: "" type: Annotation hasBody: { id: "" type: { id: "" } label: "" note: "" } hasTarget: { id: "" type: AnnotationTarget } note: "" } brainLocation: { id: "" type: BrainLocation atlasSpatialReferenceSystem: { id: "" type: AtlasSpatialReferenceSystem } brainRegion: { id: "" label: "" } coordinatesInBrainAtlas: { id: "" valueX: 0.0 valueY: 0.0 valueZ: 0.0 } coordinatesInSlice: { spatialReferenceSystem: { id: "" type: SpatialReferenceSystem } valueX: 0.0 valueY: 0.0 valueZ: 0.0 } distanceToBoundary: { boundary: { id: "" label: "" } distance: { unitCode: "" value: [ 0.0 0 ] } } layer: { id: "" label: "" } longitudinalAxis: [ Dorsal Ventral ] positionInLayer: [ Deep Superficial ] } contribution: { id: "" } distribution: { id: "" type: DataDownload contentSize: { unitCode: "" value: [ 0.0 0 ] } digest: { algorithm: "" value: "" } encodingFormat: "" license: "" name: "" } objectOfStudy: { id: "" type: ObjectOfStudy } releaseDate: 9999-12-31T00:00:00 subject: { id: "" type: Subject } }
When a forge RDFModel is configured, then there is no need to provide prefixes and namespaces when writing a SPARQL query. Prefixes and namespaces will be automatically inferred from the provided schemas and/or JSON-LD context and the query rewritten accordingly.
query = """
SELECT ?id ?name ?contributor
WHERE {
?id a Dataset ;
contribution/agent ?contributor.
?contributor name ?name.
}
"""
resources = forge.sparql(query, limit=3)
type(resources)
list
len(resources)
3
print(resources[0])
{ id: https://sandbox.bluebrainnexus.io/v1/resources/github-users/ssssarah/_/ca722ef8-2295-4cf7-a35b-3c1cf3037232 contributor: t3532 name: John Smith }
forge.as_dataframe(resources)
id | contributor | name | |
---|---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | t3532 | John Smith |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | t3533 | Jane Doe |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | t3549 | John Smith |
resources = forge.sparql(query, limit=3, debug=True)
Submitted query: PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/> PREFIX nsg: <https://neuroshapes.org/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX schema: <http://schema.org/> PREFIX sh: <http://www.w3.org/ns/shacl#> PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX vann: <http://purl.org/vocab/vann/> PREFIX void: <http://rdfs.org/ns/void#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX : <https://neuroshapes.org/> SELECT ?id ?name ?contributor WHERE { ?id a schema:Dataset ; nsg:contribution/prov:agent ?contributor. ?contributor schema:name ?name. } LIMIT 3
Regular SPARQL query can also be provided. When provided, the limit and offset arguments superseed any in query limit or offset values.
query = """
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>
PREFIX nsg: <https://neuroshapes.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://schema.org/>
PREFIX sh: <http://www.w3.org/ns/shacl#>
PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX vann: <http://purl.org/vocab/vann/>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX : <https://neuroshapes.org/>
SELECT ?id ?name
WHERE {
?id a schema:Dataset ;
nsg:contribution/prov:agent ?contributor.
?contributor schema:name ?name.
}
ORDER BY ?id
LIMIT 1
OFFSET 0
"""
# it is recommended to set 'rewrite' to 'False' to prevent the sparql query rewriting when a syntactically correct SPARQL query is provided.
resources = forge.sparql(query, rewrite=False, limit=3, offset=1, debug=True)
Submitted query: PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/> PREFIX nsg: <https://neuroshapes.org/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX schema: <http://schema.org/> PREFIX sh: <http://www.w3.org/ns/shacl#> PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX vann: <http://purl.org/vocab/vann/> PREFIX void: <http://rdfs.org/ns/void#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX : <https://neuroshapes.org/> SELECT ?id ?name WHERE { ?id a schema:Dataset ; nsg:contribution/prov:agent ?contributor. ?contributor schema:name ?name. } ORDER BY ?id LIMIT 3 OFFSET 1
type(resources)
list
len(resources)
3
type(resources[0])
kgforge.core.resource.Resource
forge.as_dataframe(resources)
id | name | |
---|---|---|
0 | https://sandbox.bluebrainnexus.io/v1/resources... | Jane Doe |
1 | https://sandbox.bluebrainnexus.io/v1/resources... | John Smith |
2 | https://sandbox.bluebrainnexus.io/v1/resources... | Jane Doe |
To not assign any limit or offset, one can pass None
to those parameters
query_without_limit = """
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/>
PREFIX nsg: <https://neuroshapes.org/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX schema: <http://schema.org/>
PREFIX sh: <http://www.w3.org/ns/shacl#>
PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX vann: <http://purl.org/vocab/vann/>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX : <https://neuroshapes.org/>
SELECT ?id ?name
WHERE {
?id a schema:Dataset ;
nsg:contribution/prov:agent ?contributor.
?contributor schema:name ?name.
}
ORDER BY ?id
"""
resources = forge.sparql(query_without_limit, rewrite=False, limit=None, offset=None, debug=True)
Submitted query: PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/> PREFIX nsg: <https://neuroshapes.org/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX schema: <http://schema.org/> PREFIX sh: <http://www.w3.org/ns/shacl#> PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX vann: <http://purl.org/vocab/vann/> PREFIX void: <http://rdfs.org/ns/void#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX : <https://neuroshapes.org/> SELECT ?id ?name WHERE { ?id a schema:Dataset ; nsg:contribution/prov:agent ?contributor. ?contributor schema:name ?name. } ORDER BY ?id
len(resources)
108
If you only want to add the context, but keep the content of the query the same, you need to set the rewrite
parameter to False
query_without_context = """
SELECT ?id ?name ?contributor
WHERE {
?id a Dataset ;
contribution/agent ?contributor.
?contributor name ?name.
}
"""
resources = forge.sparql(query_without_context, limit=None, debug=True)
Submitted query: PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX mba: <http://api.brain-map.org/api/v2/data/Structure/> PREFIX nsg: <https://neuroshapes.org/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX prov: <http://www.w3.org/ns/prov#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX schema: <http://schema.org/> PREFIX sh: <http://www.w3.org/ns/shacl#> PREFIX shsh: <http://www.w3.org/ns/shacl-shacl#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX vann: <http://purl.org/vocab/vann/> PREFIX void: <http://rdfs.org/ns/void#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX : <https://neuroshapes.org/> SELECT ?id ?name ?contributor WHERE { ?id a schema:Dataset ; nsg:contribution/prov:agent ?contributor. ?contributor schema:name ?name. }
len(resources)
108
ElasticSearch DSL can be used as a query language search for resources provided that the configured store supports it. The 'BlueBrainNexusStore' supports ElasticSearch.
Note: DemoStore doesn't implement ElasaticSearch DSL operations.
jane = Resource(type="Person", name="Jane Doe")
contribution_jane = Resource(type="Contribution", agent=jane)
john = Resource(type="Person", name="John Smith")
contribution_john = Resource(type="Contribution", agent=john)
association = Resource(type="Dataset", contribution=[contribution_jane, contribution_john])
forge.register(association)
<action> _register_one <succeeded> True
query = """
{
"_source": {
"includes": [
"@id",
"name"
]
},
"query": {
"term": {
"@type": "http://schema.org/Dataset"
}
}
}
"""
# limit and offset (when provided in this method call) superseed 'size' and 'from' values provided in the query
resources = forge.elastic(query, limit=3)
type(resources)
list
len(resources)
3
type(resources[0])
kgforge.core.resource.Resource
forge.as_dataframe(resources)
id | name | |
---|---|---|
0 | https://bbp.epfl.ch/neurosciencegraph/data/neu... | AA1543 |
1 | https://bbp.epfl.ch/neurosciencegraph/data/neu... | AA1542 |
2 | https://bbp.epfl.ch/neurosciencegraph/data/neu... | AA1544 |
resources_2 = forge.elastic(query, limit=3, as_resource=False)
type(resources_2[0])
dict
resources_2[0]
{'_id': 'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/25ca99ad-495d-4962-99de-9d4abbc5521c', '_index': 'nexus_7018fa21-cf03-4b16-b603-db111f6b8527_1', '_score': 1.5287807, '_source': {'@id': 'https://bbp.epfl.ch/neurosciencegraph/data/neuronmorphologies/25ca99ad-495d-4962-99de-9d4abbc5521c', 'name': 'AA1543'}}
Note: DemoStore doesn't implement file operations yet. Please use another store for this section.
jane = Resource(type="Person", name="Jane Doe")
! ls -p ../../data | egrep -v /$
associations.tsv my_data.xwz my_data_derived.txt persons-with-id.csv persons.csv tfidfvectorizer_model_schemaorg_linking
distribution = forge.attach("../../data")
association = Resource(type="Association", agent=jane, distribution=distribution)
forge.register(association)
<action> _register_one <succeeded> True
# By default, the downladable file urls are collected from the json path "distribution.contentUrl" (follow="distribution.contentUrl") and
# the files are downloaded in the current path (path=".").
# The argument overwrite: bool can be provided to decide whether to overwrite (True) existing files with the same name or
# to create new ones (False) with their names suffixed with a timestamp.
# A cross_bucket argument can be provided to download data from the configured bucket (cross_bucket=False - the default value)
# or from a bucket different than the configured one (cross_bucket=True). The configured store should support crossing buckets for this to work.
forge.download(association)
# Specific content type can be downloaded.
forge.download(association, content_type="text/tab-separated-values")
# The urls or the files to download can be collected from a different json path (by setting a value for "follow") and
# the files downloaded to a different path (by setting a value for "path")
forge.download(association, follow="distribution.contentUrl", path="./downloaded/")
! ls -l ./downloaded/
total 6352 -rw-r--r--@ 1 mouffok 10067 477 Oct 20 10:54 associations.tsv -rw-r--r--@ 1 mouffok 10067 477 Nov 23 15:10 associations.tsv.20231123151033 -rw-r--r--@ 1 mouffok 10067 477 Nov 23 15:10 associations.tsv.20231123151035 -rw-r--r-- 1 mouffok 10067 477 Nov 23 15:41 associations.tsv.20231123154107 -rw-r--r--@ 1 mouffok 10067 477 Nov 27 14:20 associations.tsv.20231127142049 -rw-r--r--@ 1 mouffok 10067 477 Nov 27 14:20 associations.tsv.20231127142050 -rw-r--r--@ 1 mouffok 10067 477 Nov 27 14:24 associations.tsv.20231127142458 -rw-r--r--@ 1 mouffok 10067 477 Dec 5 15:37 associations.tsv.20231205153754 -rw-r--r--@ 1 mouffok 10067 477 Dec 5 15:44 associations.tsv.20231205154444 -rw-r--r-- 1 mouffok 10067 477 Feb 1 18:40 associations.tsv.20240201184048 -rw-r--r-- 1 mouffok 10067 477 Feb 2 10:47 associations.tsv.20240202104741 -rw-r--r-- 1 mouffok 10067 477 Feb 2 10:59 associations.tsv.20240202105941 -rw-r--r-- 1 mouffok 10067 477 Feb 2 16:41 associations.tsv.20240202164100 -rw-r--r-- 1 mouffok 10067 477 Feb 5 18:34 associations.tsv.20240205183409 -rw-r--r-- 1 mouffok 10067 477 Feb 5 18:34 associations.tsv.20240205183410 -rw-r--r-- 1 mouffok 10067 477 Feb 5 18:34 associations.tsv.20240205183417 -rw-r--r-- 1 mouffok 10067 477 Feb 5 18:36 associations.tsv.20240205183603 -rw-r--r-- 1 mouffok 10067 477 Feb 6 16:09 associations.tsv.20240206160920 -rw-r--r--@ 1 mouffok 10067 477 Feb 13 14:16 associations.tsv.20240213141617 -rw-r--r-- 1 mouffok 10067 477 Feb 14 16:14 associations.tsv.20240214161402 -rw-r--r-- 1 mouffok 10067 477 Feb 15 14:28 associations.tsv.20240215142837 -rw-r--r--@ 1 mouffok 10067 16 Oct 20 10:54 my_data.xwz -rw-r--r-- 1 mouffok 10067 16 Nov 23 15:41 my_data.xwz.20231123154107 -rw-r--r--@ 1 mouffok 10067 16 Nov 27 14:24 my_data.xwz.20231127142458 -rw-r--r--@ 1 mouffok 10067 16 Dec 5 15:37 my_data.xwz.20231205153754 -rw-r--r--@ 1 mouffok 10067 16 Dec 5 15:44 my_data.xwz.20231205154444 -rw-r--r-- 1 mouffok 10067 16 Feb 1 18:40 my_data.xwz.20240201184048 -rw-r--r-- 1 mouffok 10067 16 Feb 2 10:47 my_data.xwz.20240202104741 -rw-r--r-- 1 mouffok 10067 16 Feb 2 10:59 my_data.xwz.20240202105941 -rw-r--r-- 1 mouffok 10067 16 Feb 2 16:41 my_data.xwz.20240202164100 -rw-r--r-- 1 mouffok 10067 16 Feb 5 18:36 my_data.xwz.20240205183603 -rw-r--r-- 1 mouffok 10067 16 Feb 6 16:09 my_data.xwz.20240206160920 -rw-r--r--@ 1 mouffok 10067 16 Feb 13 14:16 my_data.xwz.20240213141617 -rw-r--r-- 1 mouffok 10067 16 Feb 14 16:14 my_data.xwz.20240214161402 -rw-r--r-- 1 mouffok 10067 16 Feb 15 14:28 my_data.xwz.20240215142837 -rw-r--r--@ 1 mouffok 10067 24 Oct 20 10:54 my_data_derived.txt -rw-r--r-- 1 mouffok 10067 24 Nov 23 15:41 my_data_derived.txt.20231123154107 -rw-r--r--@ 1 mouffok 10067 24 Nov 27 14:24 my_data_derived.txt.20231127142458 -rw-r--r--@ 1 mouffok 10067 24 Dec 5 15:37 my_data_derived.txt.20231205153754 -rw-r--r--@ 1 mouffok 10067 24 Dec 5 15:44 my_data_derived.txt.20231205154444 -rw-r--r-- 1 mouffok 10067 24 Feb 1 18:40 my_data_derived.txt.20240201184048 -rw-r--r-- 1 mouffok 10067 24 Feb 2 10:47 my_data_derived.txt.20240202104741 -rw-r--r-- 1 mouffok 10067 24 Feb 2 10:59 my_data_derived.txt.20240202105941 -rw-r--r-- 1 mouffok 10067 24 Feb 2 16:41 my_data_derived.txt.20240202164100 -rw-r--r-- 1 mouffok 10067 24 Feb 5 18:36 my_data_derived.txt.20240205183603 -rw-r--r-- 1 mouffok 10067 24 Feb 6 16:09 my_data_derived.txt.20240206160920 -rw-r--r--@ 1 mouffok 10067 24 Feb 13 14:16 my_data_derived.txt.20240213141617 -rw-r--r-- 1 mouffok 10067 24 Feb 14 16:14 my_data_derived.txt.20240214161402 -rw-r--r-- 1 mouffok 10067 24 Feb 15 14:28 my_data_derived.txt.20240215142837 -rw-r--r--@ 1 mouffok 10067 126 Oct 20 10:54 persons-with-id.csv -rw-r--r-- 1 mouffok 10067 126 Nov 23 15:41 persons-with-id.csv.20231123154107 -rw-r--r--@ 1 mouffok 10067 126 Nov 27 14:24 persons-with-id.csv.20231127142458 -rw-r--r--@ 1 mouffok 10067 126 Dec 5 15:37 persons-with-id.csv.20231205153754 -rw-r--r--@ 1 mouffok 10067 126 Dec 5 15:44 persons-with-id.csv.20231205154444 -rw-r--r-- 1 mouffok 10067 126 Feb 1 18:40 persons-with-id.csv.20240201184048 -rw-r--r-- 1 mouffok 10067 126 Feb 2 10:47 persons-with-id.csv.20240202104741 -rw-r--r-- 1 mouffok 10067 126 Feb 2 10:59 persons-with-id.csv.20240202105941 -rw-r--r-- 1 mouffok 10067 126 Feb 2 16:41 persons-with-id.csv.20240202164100 -rw-r--r-- 1 mouffok 10067 126 Feb 5 18:36 persons-with-id.csv.20240205183603 -rw-r--r-- 1 mouffok 10067 126 Feb 6 16:09 persons-with-id.csv.20240206160920 -rw-r--r--@ 1 mouffok 10067 126 Feb 13 14:16 persons-with-id.csv.20240213141617 -rw-r--r-- 1 mouffok 10067 126 Feb 14 16:14 persons-with-id.csv.20240214161402 -rw-r--r-- 1 mouffok 10067 126 Feb 15 14:28 persons-with-id.csv.20240215142837 -rw-r--r--@ 1 mouffok 10067 52 Oct 20 10:54 persons.csv -rw-r--r--@ 1 mouffok 10067 52 Nov 23 15:10 persons.csv.20231123151035 -rw-r--r-- 1 mouffok 10067 52 Nov 23 15:41 persons.csv.20231123154107 -rw-r--r--@ 1 mouffok 10067 52 Nov 27 14:20 persons.csv.20231127142050 -rw-r--r--@ 1 mouffok 10067 52 Nov 27 14:24 persons.csv.20231127142458 -rw-r--r--@ 1 mouffok 10067 52 Dec 5 15:37 persons.csv.20231205153754 -rw-r--r--@ 1 mouffok 10067 52 Dec 5 15:44 persons.csv.20231205154444 -rw-r--r-- 1 mouffok 10067 52 Feb 1 18:40 persons.csv.20240201184048 -rw-r--r-- 1 mouffok 10067 52 Feb 2 10:47 persons.csv.20240202104741 -rw-r--r-- 1 mouffok 10067 52 Feb 2 10:59 persons.csv.20240202105941 -rw-r--r-- 1 mouffok 10067 52 Feb 2 16:41 persons.csv.20240202164100 -rw-r--r-- 1 mouffok 10067 52 Feb 5 18:34 persons.csv.20240205183417 -rw-r--r-- 1 mouffok 10067 52 Feb 5 18:36 persons.csv.20240205183603 -rw-r--r-- 1 mouffok 10067 52 Feb 6 16:09 persons.csv.20240206160920 -rw-r--r--@ 1 mouffok 10067 52 Feb 13 14:16 persons.csv.20240213141617 -rw-r--r-- 1 mouffok 10067 52 Feb 14 16:14 persons.csv.20240214161402 -rw-r--r-- 1 mouffok 10067 52 Feb 15 14:28 persons.csv.20240215142837 -rw-r--r--@ 1 mouffok 10067 204848 Oct 20 10:54 tfidfvectorizer_model_schemaorg_linking -rw-r--r-- 1 mouffok 10067 204848 Nov 23 15:41 tfidfvectorizer_model_schemaorg_linking.20231123154107 -rw-r--r--@ 1 mouffok 10067 204848 Nov 27 14:24 tfidfvectorizer_model_schemaorg_linking.20231127142458 -rw-r--r--@ 1 mouffok 10067 204848 Dec 5 15:37 tfidfvectorizer_model_schemaorg_linking.20231205153754 -rw-r--r--@ 1 mouffok 10067 204848 Dec 5 15:44 tfidfvectorizer_model_schemaorg_linking.20231205154444 -rw-r--r-- 1 mouffok 10067 204848 Feb 1 18:40 tfidfvectorizer_model_schemaorg_linking.20240201184048 -rw-r--r-- 1 mouffok 10067 204848 Feb 2 10:47 tfidfvectorizer_model_schemaorg_linking.20240202104741 -rw-r--r-- 1 mouffok 10067 204848 Feb 2 10:59 tfidfvectorizer_model_schemaorg_linking.20240202105941 -rw-r--r-- 1 mouffok 10067 204848 Feb 2 16:41 tfidfvectorizer_model_schemaorg_linking.20240202164100 -rw-r--r-- 1 mouffok 10067 204848 Feb 5 18:36 tfidfvectorizer_model_schemaorg_linking.20240205183603 -rw-r--r-- 1 mouffok 10067 204848 Feb 6 16:09 tfidfvectorizer_model_schemaorg_linking.20240206160920 -rw-r--r--@ 1 mouffok 10067 204848 Feb 13 14:16 tfidfvectorizer_model_schemaorg_linking.20240213141617 -rw-r--r-- 1 mouffok 10067 204848 Feb 14 16:14 tfidfvectorizer_model_schemaorg_linking.20240214161402 -rw-r--r-- 1 mouffok 10067 204848 Feb 15 14:28 tfidfvectorizer_model_schemaorg_linking.20240215142837
#! rm -R ./downloaded/