This notebook demonstrates the Resources conversion to pandas DataFrame and vice-versa.
from kgforge.core import KnowledgeGraphForge
forge = KnowledgeGraphForge("../../configurations/demo-forge.yml")
from kgforge.core import Resource
import pandas as pd
import numpy as np
address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva")
jane = Resource(id="33532569-70eb-4648-a7f1-f7ea22b0ce38", type="Person", name="Jane Doe", address=address, email="(missing)")
john = Resource(id="45e018f4-9ade-4ad0-bdcf-63902bf51cc1", type="Person", name="John Smith", email="john.smith@epfl.ch")
persons = [jane, john]
forge.register(persons)
<count> 2 <action> _register_one <succeeded> True
print(jane)
{ id: 33532569-70eb-4648-a7f1-f7ea22b0ce38 type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } email: (missing) name: Jane Doe }
print(john)
{ id: 45e018f4-9ade-4ad0-bdcf-63902bf51cc1 type: Person email: john.smith@epfl.ch name: John Smith }
print(john._store_metadata)
{'version': 1, 'deprecated': False}
forge.as_dataframe(persons)
id | type | address.type | address.country | address.locality | name | ||
---|---|---|---|---|---|---|---|
0 | 33532569-70eb-4648-a7f1-f7ea22b0ce38 | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe |
1 | 45e018f4-9ade-4ad0-bdcf-63902bf51cc1 | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
forge.as_dataframe(persons, na="(missing)")
id | type | address.type | address.country | address.locality | name | ||
---|---|---|---|---|---|---|---|
0 | 33532569-70eb-4648-a7f1-f7ea22b0ce38 | Person | PostalAddress | Switzerland | Geneva | NaN | Jane Doe |
1 | 45e018f4-9ade-4ad0-bdcf-63902bf51cc1 | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
forge.as_dataframe(persons, nesting="__")
id | type | address__type | address__country | address__locality | name | ||
---|---|---|---|---|---|---|---|
0 | 33532569-70eb-4648-a7f1-f7ea22b0ce38 | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe |
1 | 45e018f4-9ade-4ad0-bdcf-63902bf51cc1 | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
forge.as_dataframe(persons, expanded=True)
@id | @type | schema:name | |
---|---|---|---|
0 | file:///Users/agarcia/Developments/kgforge/exa... | schema:Person | Jane Doe |
1 | file:///Users/agarcia/Developments/kgforge/exa... | schema:Person | John Smith |
forge.as_dataframe(persons, store_metadata=True)
id | type | address.type | address.country | address.locality | name | deprecated | version | ||
---|---|---|---|---|---|---|---|---|---|
0 | 33532569-70eb-4648-a7f1-f7ea22b0ce38 | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe | False | 1 |
1 | 45e018f4-9ade-4ad0-bdcf-63902bf51cc1 | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith | False | 1 |
data = pd.DataFrame([
{
"id": "33532569-70eb-4648-a7f1-f7ea22b0ce38",
"type": "Person",
"address.type": "PostalAddress",
"address.country": "Switzerland",
"address.locality": "Geneva",
"email": "(missing)",
"name": "Jane Doe",
},
{
"id": "45e018f4-9ade-4ad0-bdcf-63902bf51cc1",
"type": "Person",
"address.type": np.nan,
"address.country": np.nan,
"address.locality": np.nan,
"email": "john.smith@epfl.ch",
"name": "John Smith",
}
])
data
id | type | address.type | address.country | address.locality | name | ||
---|---|---|---|---|---|---|---|
0 | 33532569-70eb-4648-a7f1-f7ea22b0ce38 | Person | PostalAddress | Switzerland | Geneva | (missing) | Jane Doe |
1 | 45e018f4-9ade-4ad0-bdcf-63902bf51cc1 | Person | NaN | NaN | NaN | john.smith@epfl.ch | John Smith |
resources = forge.from_dataframe(data)
address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva")
jane = Resource(id="33532569-70eb-4648-a7f1-f7ea22b0ce38", type="Person", name="Jane Doe", address=address, email="(missing)")
john = Resource(id="45e018f4-9ade-4ad0-bdcf-63902bf51cc1", type="Person", name="John Smith", email="john.smith@epfl.ch")
persons = [jane, john]
resources == persons
True
resources_na = forge.from_dataframe(data, na="(missing)")
print(resources[0])
{ id: 33532569-70eb-4648-a7f1-f7ea22b0ce38 type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } email: (missing) name: Jane Doe }
print(resources_na[0])
{ id: 33532569-70eb-4648-a7f1-f7ea22b0ce38 type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } name: Jane Doe }
resources_nesting = forge.from_dataframe(data, nesting="__")
print(resources[0])
{ id: 33532569-70eb-4648-a7f1-f7ea22b0ce38 type: Person address: { type: PostalAddress country: Switzerland locality: Geneva } email: (missing) name: Jane Doe }
print(resources_nesting[0])
{ id: 33532569-70eb-4648-a7f1-f7ea22b0ce38 type: Person address.country: Switzerland address.locality: Geneva address.type: PostalAddress email: (missing) name: Jane Doe }