#!/usr/bin/env python # coding: utf-8 # # DataFrame Conversions # # This notebook demonstrates how to [convert](https://nexus-forge.readthedocs.io/en/latest/interaction.html#converting) a [Resource](https://nexus-forge.readthedocs.io/en/latest/interaction.html#resource) to pandas DataFrame and vice-versa. # In[1]: from kgforge.core import KnowledgeGraphForge # A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook [00-Initialization.ipynb](00%20-%20Initialization.ipynb). # In[2]: forge = KnowledgeGraphForge("../../configurations/forge.yml") # ## Imports # In[3]: import pandas as pd import numpy as np # In[4]: from kgforge.core import Resource # ## List of Resources to DataFrame # In[5]: address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva") # In[6]: jane = Resource(type="Person", name="Jane Doe", address=address, email="(missing)") # In[7]: john = Resource(type="Person", name="John Smith", email="john.smith@epfl.ch") # In[8]: persons = [jane, john] # In[9]: forge.register(persons) # In[10]: forge.as_json(jane) # In[11]: forge.as_json(john) # In[12]: john._store_metadata # In[13]: forge.as_dataframe(persons) # It is possible to specify what values (here '(missing)') should be replaced by `NaN` using the `na` parameter. # In[14]: forge.as_dataframe(persons, na="(missing)") # It is possible to specify a string to use in the column names to show nested values, the default is dot `.`. # In[15]: forge.as_dataframe(persons, nesting="__") # The `expanded` parameter will show fields and values according to the JSON-LD context. # In[16]: forge.as_dataframe(persons, expanded=True) # In[17]: forge.as_dataframe(persons, store_metadata=True) # ## DataFrame to list of Resources # In[20]: data = pd.DataFrame([ { "type": "Person", "address.type": "PostalAddress", "address.country": "Switzerland", "address.locality": "Geneva", "email": "(missing)", "name": "Jane Doe", }, { "type": "Person", "address.type": np.nan, "address.country": np.nan, "address.locality": np.nan, "email": "john.smith@epfl.ch", "name": "John Smith", } ]) # In[21]: data # In[22]: resources = forge.from_dataframe(data) # In[23]: address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva") # In[24]: jane = Resource(type="Person", name="Jane Doe", address=address, email="(missing)") # In[25]: john = Resource(type="Person", name="John Smith", email="john.smith@epfl.ch") # In[26]: persons = [jane, john] # In[27]: resources == persons # In[28]: resources_na = forge.from_dataframe(data, na="(missing)") # In[29]: print(resources[0]) # In[30]: print(resources_na[0]) # In[31]: resources_nesting = forge.from_dataframe(data, nesting=".") # In[32]: print(resources_nesting[0]) # In[33]: data = pd.DataFrame([ { "type": "Person", "address_type": "PostalAddress", "address_country": "Switzerland", "address_locality": "Geneva", "email": "(missing)", "name": "Jane Doe", }, { "type": "Person", "address_type": np.nan, "address_country": np.nan, "address_locality": np.nan, "email": "john.smith@epfl.ch", "name": "John Smith", } ]) # In[34]: resources_nesting = forge.from_dataframe(data, nesting="_") # In[35]: print(resources_nesting[0])