#!/usr/bin/env python # coding: utf-8 # In[1]: try: import IPython except: get_ipython().run_line_magic('pip', 'install IPython') import IPython from IPython.display import display, IFrame, HTML, Javascript HTML("""""") # # Transforming Collections Data to Linked Art # # National Gallery of Art # ## Input Data # # The collection data exists into two files: # - CSV data file containing artwork description [data file](./data/nga/input/nga_ruskin.csv) # - CSV data file containing detailed digital image information for artworks https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv # # # #### Further Reading # # - National Gallery of Art https://www.nga.gov/ # - NGA GitHub https://github.com/NationalGalleryOfArt # - The input data file is from https://github.com/NationalGalleryOfArt/opendata/tree/main/data # In[2]: ### Load NGA Collection Data into DataFrame # In[3]: file = './data/nga/input/objects.csv' try: import pandas as pd except: get_ipython().system('pip install pandas') import pandas as pd mpg = pd.read_csv(file,low_memory=False) mpg.head() # ### Load NGA Digital Image File into DataFrame # # The data file containing detailed digital image data is loaded into a pandas dataframe `dataFrameNGAImages` # In[4]: file_images = "https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv" df_images = pd.read_csv(file_images) df_images.head() # ### Remove Byte Order Marks and Define Data Mapping # # Remove Byte Order Marks and create Python dictionary containing data mapping for each input file. # In[5]: import csv try: import json except: get_ipython().system('pip install json') import json #remove BOM s = open(file, mode='r', encoding='utf-8-sig').read() open(file, mode='w', encoding='utf-8').write(s) allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8')) for obj in allObjects: print(json.dumps(obj,indent=2)) break # ### Transform to JSON-LD # # This next step uses the following to transform the collections data to Linked Art JSON-LD # - the data mapping # - custom coding in createObjProp() # - cromulant Python library # - custom coding in la including createObjDescription() # # The URLs for the artwork digital images are in a separate file. With custom coding in `createObjProp()` the rows in the two collection data files are mapped to extract the digital image url. # #
#     matchImages = dataFrameNGAImages.query('depictstmsobjectid == ' + objProp["id"] )
#     objProp["image_url"] = matchImages["iiifurl"].iloc[0]  + "/full/!500,500/0/default.jpg"
# 
# # # Additional custom code creates a web page URL for the artwork: # #
# objProp["homepage"] = "https://www.nga.gov/collection/art-object-page." + id + ".html"   
# 
# In[6]: mapp = { "id":"objectid", "accession_number":"accessionnum", "accession_date": "", "classification" : "classification", "title": "title", "alt_title": "", "notes": "", "date_created":"displaydate", "date_created_earliest": "beginyear", "date_created_latest": "endyear", "created_period":"", "created_dynasty":"", "created_inscriptions":"", "created_notes": "", "creator":"attribution", "physical_medium": "medium", "physical_style": "", "physical_technique": "", "physical_description": "", "physical_dimensions": "dimensions", "created_provenance": "" , "credit_line": "creditline", "collection" : "departmentabbr", "current_status" : "", "current_owner" : "", "image_url": "", "homepage": "" } # display transposed dataframe of data mapping display(pd.DataFrame(mapp, index=[0]).T) # In[7]: # baseURI for JSON-LD document baseURI = "https://www.nga.gov/collection/" def createObjProp(obj,mapp,baseURI): objProp = {} csv_keys = list(obj.keys()) for key in csv_keys: for prop in mapp: if key == mapp[prop]: if prop == "creator": objProp[prop] = [{"id": baseURI +"creatorid/" + obj[mapp["id"]] ,"name": obj[key],"role":"Artist"}] else: objProp[prop] = obj[key] objProp["homepage"] = "" objProp["current_owner"] = {"name":"National Gallery of Art", "location":"Washington, D.C., United States", "type": "http://vocab.getty.edu/aat/300312281" , "type_label": ""} return objProp # In[8]: from lib import linkedart as la try: import cromulent except: get_ipython().system('pip install cromulent') import cromulent from cromulent.model import factory outputdir = "./data/nga/output/json/all/" # list to hold file names for use with jsonld visualisation dropdown selectOptions = [] selectOptions = [('Please select an artwork', '')] dfimg_list = df_images['depictstmsobjectid'].tolist() dfimgurl_list = df_images['iiifurl'].tolist() counter = 1 for obj in allObjects: if counter > 100: break # create object property dictionary objProp = createObjProp(obj,mapp,baseURI) id = objProp["id"] object_uri = baseURI + id if int(id) in dfimg_list: df_images_match = df_images.loc[df_images['depictstmsobjectid'] == int(id)] objProp["image_url"] = df_images_match.iloc[0]["iiifurl"] + "/full/!500,500/0/default.jpg" filename = objProp["id"] + ".json" selectOptions.append( ( objProp["title"] + " (" + filename + ")" , filename)) # create obj description objLA = la.createObjDesc(objProp,la.objTypes,object_uri) # write to file text_file = open(outputdir + filename, "wt") n = text_file.write(factory.toString(objLA, compact=False)) text_file.close() counter = counter + 1 # ### Explore the Linked Art JSON-LD files # # Select an artwork from the dropdown to view # - the artwork image # - a visualisation of the Linked Art JSON-LD representation created above # In[9]: try: import ipywidgets except: get_ipython().run_line_magic('pip', 'install ipywidgets') import ipywidgets from ipywidgets import Layout, FileUpload from IPython.display import display, IFrame, HTML, Image from IPython.core.display import Javascript import os try: import json except: get_ipython().run_line_magic('pip', 'install json') import json def dropdown_eventhandler(change): with open('./src/js/visld.js', 'r') as _jscript: code = _jscript.read() + "var file = '" + outputdir + change.new + "';var selector = '#visnga';visjsonld(file, selector); " display(Javascript(code)) with open( outputdir + "/" + change.new) as json_file: artwork = json.load(json_file) if ("representation" in artwork): image = artwork["representation"][0]["id"] display(Javascript("document.getElementById('artworknga').src = '" + image + "';")) else: display(Javascript("document.getElementById('artworknga').src = '';")) selectObject = ipywidgets.Dropdown(options=selectOptions) selectObject.observe(dropdown_eventhandler, names='value') display(selectObject) #
# #
# In[ ]: