#!/usr/bin/env python
# coding: utf-8
# In[2]:
from IPython.core.display import HTML
HTML("""
""")
# # Collections Data to Linked Art - Cleveland Museum of Art
#
# This Jupyter notebook transforms collection data from the Cleveland Museum of Art to a Linked Art representation in a JSON-LD format.
#
# ## Collection Data
#
# The input data file is available from the Cleveland Museum of Art's GitHub repository:
# https://github.com/ClevelandMuseumArt/openaccess
#
# ## Transformation Process
#
# 1. Read CSV file
# 2. Convert CSV file to Python Dictionary
# 3. Create a field mapping between the CSV file fields and the Linked Art data model
# 4. Create an object property dictionary
# In[2]:
try:
import IPython
except:
get_ipython().run_line_magic('pip', 'install IPython')
import IPython
from IPython.display import display,IFrame,HTML, Javascript
try:
import json
except:
get_ipython().run_line_magic('pip', 'install json')
import json
try:
import csv
except:
get_ipython().run_line_magic('pip', 'install csv')
import csv
import os
try:
import cromulent
except:
get_ipython().run_line_magic('pip', 'install cromulent')
import cromulent
from cromulent.model import factory
import lib.linkedart as la
try:
import pandas as pd
except:
get_ipython().run_line_magic('pip', 'install pandas')
import pandas as pd
# ### Parse Collection Data CSV File
# In[3]:
file = './data/cma/input/data.csv'
mpg = pd.read_csv(file,low_memory=False)
mpg.head()
# ### Read Collection Data CSV file into Python Dictionary
#
# - Remove Byte Order Mark (BOM) from CSV file
# - Use `csv.DictReader()` to create an object that operates like a regular reader but maps the information in each row to a dict whose keys are given by the optional fieldnames parameter.
#
#
# In[4]:
# remove BOM see - https://stackoverflow.com/questions/8898294/convert-utf-8-with-bom-to-utf-8-with-no-bom-in-python
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)
# open file and read into Python dictionary
allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))
display(HTML("
Example Record in Python Dictionary
"))
for obj in allObjects:
print(json.dumps(obj,indent=2))
break
# #### Further Reading
#
# - Python CSV https://docs.python.org/3/library/csv.html
# - Byte Order Mark https://en.wikipedia.org/wiki/Byte_order_mark
# ### Create field mapping
# In[5]:
mapp = {
"id":"id",
"accession_number":"accession_number",
"accession_date": "",
"classification" : "type",
"title": "title",
"alt_title": "title_in_original_language",
"notes": "tombstone",
"date_created":"creation_date",
"date_created_earliest": "creation_date_earliest",
"date_created_latest": "creation_date_latest",
"created_period":"culture",
"created_dynasty":"",
"created_inscriptions":"inscriptions",
"created_notes": "fun_fact",
"creator":"creators",
"physical_medium": "Medium",
"physical_style": "",
"physical_technique": "technique",
"physical_description": "",
"physical_dimensions": "measurements",
"created_provenance": "provenance" ,
"credit_line": "creditline",
"collection" : "department",
"current_status" : "current_location",
"current_location": "current_location",
"homepage": "url"
}
# ### Create object property dictionary
# In[6]:
def createObjProp(obj,mapp):
objProp = {}
csv_keys = list(obj.keys())
for key in csv_keys:
for prop in mapp:
if key == mapp[prop]:
if prop == "creator":
objProp[prop] = {"name": obj[key]}
else:
objProp[prop] = obj[key]
objProp["current_owner"] = {"name":"Cleveland Museum of Art",
"location":"Cleveland,Ohio",
"type": "http://vocab.getty.edu/aat/300312281" ,
"type_label": ""}
return objProp
# baseURI for JSON-LD document
baseURI = "https://clevelandart.org/art/"
for index,obj in enumerate(allObjects):
if index >= 5:
break
objProp = createObjProp(obj,mapp)
id = obj[mapp.get("id")]
object_uri = baseURI + id
objLA = None
objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
# write to file
text_file = open("./data/cma/output/json/all/" + id + ".json", "wt")
n = text_file.write(factory.toString(objLA, compact=False))
text_file.close()
# In[7]:
from IPython.display import display,HTML,Javascript
HTML('Data Visualisation
')
# In[8]:
from IPython.core.display import Javascript
code2 = "var file = './data/cma/output/json/all/" + id + ".json';"\
"var selector = '#example';" \
"visjsonld(file, selector); "
with open('src/js/visld.js', 'r') as _jscript:
code = _jscript.read() + code2
Javascript(code)
# In[9]:
from IPython.core.display import Javascript, HTML
def fn(fpath): # 1.Get file names from directory
file_list=os.listdir(r"" + fpath)
cnt =1
for file in file_list:
cnt = cnt+1
display(HTML("" + file + ""))
display(HTML("File list
Click on a link to view created Linked Art JSON-LD file
"))
fn("./data/cma/output/json/all/")
# In[ ]:
# In[ ]: