#!/usr/bin/env python # coding: utf-8 # In[2]: from IPython.core.display import HTML HTML(""" """) # # Collections Data to Linked Art - Cleveland Museum of Art # # This Jupyter notebook transforms collection data from the Cleveland Museum of Art to a Linked Art representation in a JSON-LD format. # # ## Collection Data # # The input data file is available from the Cleveland Museum of Art's GitHub repository: # https://github.com/ClevelandMuseumArt/openaccess # # ## Transformation Process # # 1. Read CSV file # 2. Convert CSV file to Python Dictionary # 3. Create a field mapping between the CSV file fields and the Linked Art data model # 4. Create an object property dictionary # In[2]: try: import IPython except: get_ipython().run_line_magic('pip', 'install IPython') import IPython from IPython.display import display,IFrame,HTML, Javascript try: import json except: get_ipython().run_line_magic('pip', 'install json') import json try: import csv except: get_ipython().run_line_magic('pip', 'install csv') import csv import os try: import cromulent except: get_ipython().run_line_magic('pip', 'install cromulent') import cromulent from cromulent.model import factory import lib.linkedart as la try: import pandas as pd except: get_ipython().run_line_magic('pip', 'install pandas') import pandas as pd # ### Parse Collection Data CSV File # In[3]: file = './data/cma/input/data.csv' mpg = pd.read_csv(file,low_memory=False) mpg.head() # ### Read Collection Data CSV file into Python Dictionary # # - Remove Byte Order Mark (BOM) from CSV file # - Use `csv.DictReader()` to create an object that operates like a regular reader but maps the information in each row to a dict whose keys are given by the optional fieldnames parameter. # # # In[4]: # remove BOM see - https://stackoverflow.com/questions/8898294/convert-utf-8-with-bom-to-utf-8-with-no-bom-in-python s = open(file, mode='r', encoding='utf-8-sig').read() open(file, mode='w', encoding='utf-8').write(s) # open file and read into Python dictionary allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8')) display(HTML("

Example Record in Python Dictionary

")) for obj in allObjects: print(json.dumps(obj,indent=2)) break # #### Further Reading # # - Python CSV https://docs.python.org/3/library/csv.html # - Byte Order Mark https://en.wikipedia.org/wiki/Byte_order_mark # ### Create field mapping # In[5]: mapp = { "id":"id", "accession_number":"accession_number", "accession_date": "", "classification" : "type", "title": "title", "alt_title": "title_in_original_language", "notes": "tombstone", "date_created":"creation_date", "date_created_earliest": "creation_date_earliest", "date_created_latest": "creation_date_latest", "created_period":"culture", "created_dynasty":"", "created_inscriptions":"inscriptions", "created_notes": "fun_fact", "creator":"creators", "physical_medium": "Medium", "physical_style": "", "physical_technique": "technique", "physical_description": "", "physical_dimensions": "measurements", "created_provenance": "provenance" , "credit_line": "creditline", "collection" : "department", "current_status" : "current_location", "current_location": "current_location", "homepage": "url" } # ### Create object property dictionary # In[6]: def createObjProp(obj,mapp): objProp = {} csv_keys = list(obj.keys()) for key in csv_keys: for prop in mapp: if key == mapp[prop]: if prop == "creator": objProp[prop] = {"name": obj[key]} else: objProp[prop] = obj[key] objProp["current_owner"] = {"name":"Cleveland Museum of Art", "location":"Cleveland,Ohio", "type": "http://vocab.getty.edu/aat/300312281" , "type_label": ""} return objProp # baseURI for JSON-LD document baseURI = "https://clevelandart.org/art/" for index,obj in enumerate(allObjects): if index >= 5: break objProp = createObjProp(obj,mapp) id = obj[mapp.get("id")] object_uri = baseURI + id objLA = None objLA = la.createObjDesc(objProp,la.objTypes,object_uri) # write to file text_file = open("./data/cma/output/json/all/" + id + ".json", "wt") n = text_file.write(factory.toString(objLA, compact=False)) text_file.close() # In[7]: from IPython.display import display,HTML,Javascript HTML('

Data Visualisation

') # In[8]: from IPython.core.display import Javascript code2 = "var file = './data/cma/output/json/all/" + id + ".json';"\ "var selector = '#example';" \ "visjsonld(file, selector); " with open('src/js/visld.js', 'r') as _jscript: code = _jscript.read() + code2 Javascript(code) # In[9]: from IPython.core.display import Javascript, HTML def fn(fpath): # 1.Get file names from directory file_list=os.listdir(r"" + fpath) cnt =1 for file in file_list: cnt = cnt+1 display(HTML("" + file + "")) display(HTML("

File list

Click on a link to view created Linked Art JSON-LD file

")) fn("./data/cma/output/json/all/") # In[ ]: # In[ ]: