from kgforge.core import KnowledgeGraphForge
A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook 00-Initialization.ipynb.
forge = KnowledgeGraphForge("../../configurations/forge.yml")
import pandas as pd
import numpy as np
from kgforge.core import Resource
address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva")
jane = Resource(type="Person", name="Jane Doe", address=address, email="(missing)")
john = Resource(type="Person", name="John Smith", email="john.smith@epfl.ch")
persons = [jane, john]
forge.register(persons)
<count> 2 <action> _register_many <succeeded> True
forge.as_json(jane)
{'id': 'https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/99105664-6d99-45a5-90e8-82f58e45f36a', 'type': 'Person', 'address': {'type': 'PostalAddress', 'country': 'Switzerland', 'locality': 'Geneva'}, 'email': '(missing)', 'name': 'Jane Doe'}
forge.as_json(john)
{'id': 'https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/9b4976dc-6fb8-49fb-892d-a634d27eac3b', 'type': 'Person', 'email': 'john.smith@epfl.ch', 'name': 'John Smith'}
john._store_metadata
{'id': 'https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/9b4976dc-6fb8-49fb-892d-a634d27eac3b', '_constrainedBy': 'https://bluebrain.github.io/nexus/schemas/unconstrained.json', '_createdAt': '2022-04-12T22:24:14.009Z', '_createdBy': 'https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy', '_deprecated': False, '_incoming': 'https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/9b4976dc-6fb8-49fb-892d-a634d27eac3b/incoming', '_outgoing': 'https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/9b4976dc-6fb8-49fb-892d-a634d27eac3b/outgoing', '_project': 'https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge', '_rev': 1, '_schemaProject': 'https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge', '_self': 'https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/9b4976dc-6fb8-49fb-892d-a634d27eac3b', '_updatedAt': '2022-04-12T22:24:14.009Z', '_updatedBy': 'https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy'}
forge.as_dataframe(persons)
id | type | address.type | address.country | address.locality | name | ||
---|---|---|---|---|---|---|---|
0 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe |
1 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
It is possible to specify what values (here '(missing)') should be replaced by NaN
using the na
parameter.
forge.as_dataframe(persons, na="(missing)")
id | type | address.type | address.country | address.locality | name | ||
---|---|---|---|---|---|---|---|
0 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | PostalAddress | Switzerland | Geneva | NaN | Jane Doe |
1 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
It is possible to specify a string to use in the column names to show nested values, the default is dot .
.
forge.as_dataframe(persons, nesting="__")
id | type | address__type | address__country | address__locality | name | ||
---|---|---|---|---|---|---|---|
0 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe |
1 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
The expanded
parameter will show fields and values according to the JSON-LD context.
forge.as_dataframe(persons, expanded=True)
@id | @type | http://schema.org/address | http://schema.org/email | http://schema.org/name | |
---|---|---|---|---|---|
0 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | [http://schema.org/Person] | [{'@type': ['https://bbp.epfl.ch/nexus/v1/reso... | [{'@value': '(missing)'}] | [{'@value': 'Jane Doe'}] |
1 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | [http://schema.org/Person] | NaN | [{'@value': 'john.smith@epfl.ch'}] | [{'@value': 'John Smith'}] |
forge.as_dataframe(persons, store_metadata=True)
id | type | address.type | address.country | address.locality | name | _constrainedBy | _createdAt | _createdBy | _deprecated | _incoming | _outgoing | _project | _rev | _schemaProject | _self | _updatedAt | _updatedBy | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe | https://bluebrain.github.io/nexus/schemas/unco... | 2022-04-12T22:24:14.013Z | https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy | False | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge | 1 | https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | 2022-04-12T22:24:14.013Z | https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy |
1 | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith | https://bluebrain.github.io/nexus/schemas/unco... | 2022-04-12T22:24:14.009Z | https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy | False | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge | 1 | https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge | https://bbp.epfl.ch/nexus/v1/resources/dke/kgf... | 2022-04-12T22:24:14.009Z | https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy |
data = pd.DataFrame([
{
"type": "Person",
"address.type": "PostalAddress",
"address.country": "Switzerland",
"address.locality": "Geneva",
"email": "(missing)",
"name": "Jane Doe",
},
{
"type": "Person",
"address.type": np.nan,
"address.country": np.nan,
"address.locality": np.nan,
"email": "john.smith@epfl.ch",
"name": "John Smith",
}
])
data
type | address.type | address.country | address.locality | name | ||
---|---|---|---|---|---|---|
0 | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe |
1 | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
resources = forge.from_dataframe(data)
address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva")
jane = Resource(type="Person", name="Jane Doe", address=address, email="(missing)")
john = Resource(type="Person", name="John Smith", email="john.smith@epfl.ch")
persons = [jane, john]
resources == persons
True
resources_na = forge.from_dataframe(data, na="(missing)")
print(resources[0])
{ type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } email: (missing) name: Jane Doe }
print(resources_na[0])
{ type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } name: Jane Doe }
resources_nesting = forge.from_dataframe(data, nesting=".")
print(resources_nesting[0])
{ type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } email: (missing) name: Jane Doe }
data = pd.DataFrame([
{
"type": "Person",
"address_type": "PostalAddress",
"address_country": "Switzerland",
"address_locality": "Geneva",
"email": "(missing)",
"name": "Jane Doe",
},
{
"type": "Person",
"address_type": np.nan,
"address_country": np.nan,
"address_locality": np.nan,
"email": "john.smith@epfl.ch",
"name": "John Smith",
}
])
resources_nesting = forge.from_dataframe(data, nesting="_")
print(resources_nesting[0])
{ type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } email: (missing) name: Jane Doe }