pyJedAI needs as input a pandas.DataFrame. In this notebook we provide some examples of data reading and transformation to DataFrame.
import pandas as pd
Example Dataset: CORA
d1 = pd.read_csv("../data/der/cora/cora.csv", sep='|')
gt = pd.read_csv("../data/der/cora/cora_gt.csv", sep='|', header=None)
d1.head(1)
Entity Id | address | author | editor | institution | month | note | pages | publisher | title | venue | volume | year | Unnamed: 13 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | los alamitos, ca: | p. auer, n. cesa-bianchi, y. freund, and r. e.... | NaN | NaN | NaN | NaN | pp. 322-331. | ieee computer society press, | 'gambling in a rigged casino: the adversarial ... | in proc. 36th annual symposium on foundations ... | NaN | 1995, | NaN |
d1 = pd.read_json("../data/der/cora/cora.json")
gt = pd.read_json("../data/der/cora/cora_gt.json")
d1.head(1)
Entity Id | address | author | editor | institution | month | note | pages | publisher | title | venue | volume | year | Unnamed: 13 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | los alamitos, ca: | p. auer, n. cesa-bianchi, y. freund, and r. e.... | None | None | None | None | pp. 322-331. | ieee computer society press, | 'gambling in a rigged casino: the adversarial ... | in proc. 36th annual symposium on foundations ... | None | 1995, | NaN |
d1 = pd.read_excel("../data/der/cora/cora.xlsx")
gt = pd.read_excel("../data/der/cora/cora_gt.xlsx")
d1.head(1)
Unnamed: 0 | Entity Id | address | author | editor | institution | month | note | pages | publisher | title | venue | volume | year | Unnamed: 13 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | los alamitos, ca: | p. auer, n. cesa-bianchi, y. freund, and r. e.... | NaN | NaN | NaN | NaN | pp. 322-331. | ieee computer society press, | 'gambling in a rigged casino: the adversarial ... | in proc. 36th annual symposium on foundations ... | NaN | 1995, | NaN |
import rdfpandas as rfd
import rdflib
rdfd1 = rdflib.Graph().parse('../data/rdf/restaurants/restaurant1.nt')
rdfd2 = rdflib.Graph().parse('../data/rdf/restaurants/restaurant2.nt')
def rdf_to_df(graph_parsed) -> pd.DataFrame:
subject = []
predicate = []
rdfobject = []
df = pd.DataFrame(columns=['subject', 'predicate', 'object'])
for s, p, o in graph_parsed:
subject.append(s)
predicate.append(p)
rdfobject.append(o)
df['predicate'] = predicate
df['subject'] = subject
df['object'] = rdfobject
return df
d1 = rdf_to_df(rdfd1)
d2 = rdf_to_df(rdfd2)
d1.head(2)
d2.head(2)
from sqlite3 import connect
conn = connect(':memory:')
d1.to_sql('d1', conn)
d2.to_sql('d2', conn)
gt.to_sql('gt', conn)
sql_d1 = pd.read_sql('SELECT * FROM d1', conn)
sql_d2 = pd.read_sql('SELECT * FROM d2', conn)
sql_gt = pd.read_sql('SELECT * FROM gt', conn)
sql_d1.head(1)
from sqlalchemy import create_engine
POSTGRES_ADDRESS = 'db' ## INSERT YOUR DB ADDRESS
POSTGRES_PORT = '5439'
POSTGRES_USERNAME = 'username' ## CHANGE THIS TO YOUR POSTGRES USERNAME
POSTGRES_PASSWORD = 'root' ## CHANGE THIS TO YOUR POSTGRES PASSWORD
POSTGRES_DBNAME = 'database' ## CHANGE THIS TO YOUR DATABASE NAME
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(
username=POSTGRES_USERNAME,
password=POSTGRES_PASSWORD,
ipaddress=POSTGRES_ADDRESS,
port=POSTGRES_PORT,
dbname=POSTGRES_DBNAME
))
# Create the connection
cnx = create_engine(postgres_str)
pd.read_sql('SELECT * FROM d1', cnx)
from pandas import json_normalize
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.setQuery("""
SELECT *
WHERE
{
?athlete rdfs:label "Cristiano Ronaldo"@en ;
dbo:birthPlace ?place .
?place a dbo:City ;
rdfs:label ?cityName .
}
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()
d1 = json_normalize(results["results"]["bindings"])
d1