try:
import IPython
except:
%pip install IPython
import IPython
from IPython.display import display, IFrame, HTML, Javascript
HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")
The collection data exists into two files:
### Load NGA Collection Data into DataFrame
file = './data/nga/input/objects.csv'
try:
import pandas as pd
except:
!pip install pandas
import pandas as pd
mpg = pd.read_csv(file,low_memory=False)
mpg.head()
objectid | accessioned | accessionnum | locationid | title | displaydate | beginyear | endyear | visualbrowsertimespan | medium | ... | visualbrowserclassification | parentid | isvirtual | departmentabbr | portfolio | series | volume | watermarks | lastdetectedmodification | customprinturl | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 113260 | 1 | 2000.127.20.1-193 | NaN | Lithographs, Volume 9 | NaN | 1804.0 | 1866.0 | 1801 to 1825 | book of lithographs | ... | volume | NaN | 0 | CG-E | NaN | NaN | NaN | NaN | 2020-05-06 22:01:32.06-04 | NaN |
1 | 113833 | 1 | 2000.127.3.1-172 | NaN | Lithographs, Volume 12 | NaN | 1804.0 | 1866.0 | 1801 to 1825 | book of lithographs | ... | volume | NaN | 0 | CG-E | NaN | NaN | NaN | NaN | 2020-05-06 22:01:32.06-04 | NaN |
2 | 114640 | 1 | 2000.127.8.1-110 | NaN | Lithographs, Volume 17 | NaN | 1804.0 | 1866.0 | 1801 to 1825 | book of lithographs | ... | volume | NaN | 0 | CG-E | NaN | NaN | NaN | NaN | 2020-05-06 22:01:32.06-04 | NaN |
3 | 114855 | 1 | 2000.127.10.1-28 | NaN | Lithographs, Volume 19 | NaN | 1804.0 | 1866.0 | 1801 to 1825 | book of lithographs | ... | volume | NaN | 0 | CG-E | NaN | NaN | NaN | NaN | 2020-05-06 22:01:32.06-04 | NaN |
4 | 119191 | 1 | 2001.100.2.b | NaN | Studies of Lago Maggiore and and the Entrance ... | c. 1700 | 1700.0 | 1700.0 | 1651 to 1700 | brown ink over graphite on laid paper | ... | drawing | 119190.0 | 0 | CG-E | NaN | NaN | NaN | NaN | 2019-10-28 22:01:34.883-04 | NaN |
5 rows × 28 columns
The data file containing detailed digital image data is loaded into a pandas dataframe dataFrameNGAImages
file_images = "https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv"
df_images = pd.read_csv(file_images)
df_images.head()
uuid | iiifurl | iiifthumburl | viewtype | sequence | width | height | maxpixels | created | modified | depictstmsobjectid | assistivetext | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 00004dec-8300-4487-8d89-562d0126b6a1 | https://api.nga.gov/iiif/00004dec-8300-4487-8d... | https://api.nga.gov/iiif/00004dec-8300-4487-8d... | primary | 0.0 | 2623 | 4000 | 640.0 | 2010-09-07 15:08:48-04 | 2022-04-21 12:57:43.657-04 | 11975 | NaN |
1 | 00007f61-4922-417b-8f27-893ea328206c | https://api.nga.gov/iiif/00007f61-4922-417b-8f... | https://api.nga.gov/iiif/00007f61-4922-417b-8f... | primary | 0.0 | 3365 | 4332 | NaN | 2013-07-05 15:41:08-04 | 2022-05-23 14:59:28-04 | 17387 | NaN |
2 | 0000bd8c-39de-4453-b55d-5e28a9beed38 | https://api.nga.gov/iiif/0000bd8c-39de-4453-b5... | https://api.nga.gov/iiif/0000bd8c-39de-4453-b5... | primary | 0.0 | 3500 | 4688 | NaN | 2013-08-05 14:31:59-04 | 2022-05-23 15:05:58-04 | 19245 | NaN |
3 | 0000e5a4-7d32-4c2a-97c6-a6b571c9fd71 | https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97... | https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97... | primary | 0.0 | 2252 | 3000 | NaN | 2013-03-18 14:39:55-04 | 2022-05-17 18:19:25-04 | 153987 | NaN |
4 | 0001668a-dd1c-48e8-9267-b6d1697d43c8 | https://api.nga.gov/iiif/0001668a-dd1c-48e8-92... | https://api.nga.gov/iiif/0001668a-dd1c-48e8-92... | primary | 0.0 | 3446 | 4448 | NaN | 2014-01-02 14:50:50-05 | 2022-05-23 15:39:38-04 | 23830 | NaN |
Remove Byte Order Marks and create Python dictionary containing data mapping for each input file.
import csv
try:
import json
except:
!pip install json
import json
#remove BOM
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)
allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))
for obj in allObjects:
print(json.dumps(obj,indent=2))
break
{ "objectid": "113260", "accessioned": "1", "accessionnum": "2000.127.20.1-193", "locationid": "", "title": "Lithographs, Volume 9", "displaydate": "", "beginyear": "1804", "endyear": "1866", "visualbrowsertimespan": "1801 to 1825", "medium": "book of lithographs", "dimensions": "", "inscription": "", "markings": "", "attributioninverted": "Gavarni, Paul", "attribution": "Paul Gavarni", "creditline": "Ailsa Mellon Bruce Fund", "classification": "Volume", "subclassification": "", "visualbrowserclassification": "volume", "parentid": "", "isvirtual": "0", "departmentabbr": "CG-E", "portfolio": "", "series": "", "volume": "", "watermarks": "", "lastdetectedmodification": "2020-05-06 22:01:32.06-04", "customprinturl": "" }
This next step uses the following to transform the collections data to Linked Art JSON-LD
The URLs for the artwork digital images are in a separate file. With custom coding in createObjProp()
the rows in the two collection data files are mapped to extract the digital image url.
matchImages = dataFrameNGAImages.query('depictstmsobjectid == ' + objProp["id"] ) objProp["image_url"] = matchImages["iiifurl"].iloc[0] + "/full/!500,500/0/default.jpg"
Additional custom code creates a web page URL for the artwork:
objProp["homepage"] = "https://www.nga.gov/collection/art-object-page." + id + ".html"
mapp = {
"id":"objectid",
"accession_number":"accessionnum",
"accession_date": "",
"classification" : "classification",
"title": "title",
"alt_title": "",
"notes": "",
"date_created":"displaydate",
"date_created_earliest": "beginyear",
"date_created_latest": "endyear",
"created_period":"",
"created_dynasty":"",
"created_inscriptions":"",
"created_notes": "",
"creator":"attribution",
"physical_medium": "medium",
"physical_style": "",
"physical_technique": "",
"physical_description": "",
"physical_dimensions": "dimensions",
"created_provenance": "" ,
"credit_line": "creditline",
"collection" : "departmentabbr",
"current_status" : "",
"current_owner" : "",
"image_url": "",
"homepage": ""
}
# display transposed dataframe of data mapping
display(pd.DataFrame(mapp, index=[0]).T)
0 | |
---|---|
id | objectid |
accession_number | accessionnum |
accession_date | |
classification | classification |
title | title |
alt_title | |
notes | |
date_created | displaydate |
date_created_earliest | beginyear |
date_created_latest | endyear |
created_period | |
created_dynasty | |
created_inscriptions | |
created_notes | |
creator | attribution |
physical_medium | medium |
physical_style | |
physical_technique | |
physical_description | |
physical_dimensions | dimensions |
created_provenance | |
credit_line | creditline |
collection | departmentabbr |
current_status | |
current_owner | |
image_url | |
homepage |
# baseURI for JSON-LD document
baseURI = "https://www.nga.gov/collection/"
def createObjProp(obj,mapp,baseURI):
objProp = {}
csv_keys = list(obj.keys())
for key in csv_keys:
for prop in mapp:
if key == mapp[prop]:
if prop == "creator":
objProp[prop] = [{"id": baseURI +"creatorid/" + obj[mapp["id"]] ,"name": obj[key],"role":"Artist"}]
else:
objProp[prop] = obj[key]
objProp["homepage"] = ""
objProp["current_owner"] = {"name":"National Gallery of Art",
"location":"Washington, D.C., United States",
"type": "http://vocab.getty.edu/aat/300312281" ,
"type_label": ""}
return objProp
from lib import linkedart as la
try:
import cromulent
except:
!pip install cromulent
import cromulent
from cromulent.model import factory
outputdir = "./data/nga/output/json/all/"
# list to hold file names for use with jsonld visualisation dropdown
selectOptions = []
selectOptions = [('Please select an artwork', '')]
dfimg_list = df_images['depictstmsobjectid'].tolist()
dfimgurl_list = df_images['iiifurl'].tolist()
counter = 1
for obj in allObjects:
if counter > 100:
break
# create object property dictionary
objProp = createObjProp(obj,mapp,baseURI)
id = objProp["id"]
object_uri = baseURI + id
if int(id) in dfimg_list:
df_images_match = df_images.loc[df_images['depictstmsobjectid'] == int(id)]
objProp["image_url"] = df_images_match.iloc[0]["iiifurl"] + "/full/!500,500/0/default.jpg"
filename = objProp["id"] + ".json"
selectOptions.append( ( objProp["title"] + " (" + filename + ")" , filename))
# create obj description
objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
# write to file
text_file = open(outputdir + filename, "wt")
n = text_file.write(factory.toString(objLA, compact=False))
text_file.close()
counter = counter + 1
Select an artwork from the dropdown to view
try:
import ipywidgets
except:
%pip install ipywidgets
import ipywidgets
from ipywidgets import Layout, FileUpload
from IPython.display import display, IFrame, HTML, Image
from IPython.core.display import Javascript
import os
try:
import json
except:
%pip install json
import json
def dropdown_eventhandler(change):
with open('./src/js/visld.js', 'r') as _jscript:
code = _jscript.read() + "var file = '" + outputdir + change.new + "';var selector = '#visnga';visjsonld(file, selector); "
display(Javascript(code))
with open( outputdir + "/" + change.new) as json_file:
artwork = json.load(json_file)
if ("representation" in artwork):
image = artwork["representation"][0]["id"]
display(Javascript("document.getElementById('artworknga').src = '" + image + "';"))
else:
display(Javascript("document.getElementById('artworknga').src = '';"))
selectObject = ipywidgets.Dropdown(options=selectOptions)
selectObject.observe(dropdown_eventhandler, names='value')
display(selectObject)
Dropdown(options=(('Please select an artwork', ''), ('Studies of Lago Maggiore and and the Entrance to a Palaz…