In [1]:

try:
    import IPython
except:
    %pip install IPython
    import IPython 
from IPython.display import display, IFrame, HTML, Javascript
HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")

Out[1]:

Transforming Collections Data to Linked Art¶

National Gallery of Art¶

Input Data¶

The collection data exists into two files:

CSV data file containing artwork description data file
CSV data file containing detailed digital image information for artworks https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv

Load NGA Digital Image File into DataFrame¶

The data file containing detailed digital image data is loaded into a pandas dataframe dataFrameNGAImages

In [4]:

file_images = "https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv"
df_images = pd.read_csv(file_images)
df_images.head()

Out[4]:

	uuid	iiifurl	iiifthumburl	viewtype	width	height	maxpixels	created	modified	depictstmsobjectid	assistivetext
0	00004dec-8300-4487-8d89-562d0126b6a1	https://api.nga.gov/iiif/00004dec-8300-4487-8d...	https://api.nga.gov/iiif/00004dec-8300-4487-8d...	primary	2623	4000	640.0	2010-09-07 15:08:48-04	2022-04-21 12:57:43.657-04	11975	NaN
1	00007f61-4922-417b-8f27-893ea328206c	https://api.nga.gov/iiif/00007f61-4922-417b-8f...	https://api.nga.gov/iiif/00007f61-4922-417b-8f...	primary	3365	4332	NaN	2013-07-05 15:41:08-04	2022-05-23 14:59:28-04	17387	NaN
2	0000bd8c-39de-4453-b55d-5e28a9beed38	https://api.nga.gov/iiif/0000bd8c-39de-4453-b5...	https://api.nga.gov/iiif/0000bd8c-39de-4453-b5...	primary	3500	4688	NaN	2013-08-05 14:31:59-04	2022-05-23 15:05:58-04	19245	NaN
3	0000e5a4-7d32-4c2a-97c6-a6b571c9fd71	https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97...	https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97...	primary	2252	3000	NaN	2013-03-18 14:39:55-04	2022-05-17 18:19:25-04	153987	NaN
4	0001668a-dd1c-48e8-9267-b6d1697d43c8	https://api.nga.gov/iiif/0001668a-dd1c-48e8-92...	https://api.nga.gov/iiif/0001668a-dd1c-48e8-92...	primary	3446	4448	NaN	2014-01-02 14:50:50-05	2022-05-23 15:39:38-04	23830	NaN

Remove Byte Order Marks and Define Data Mapping¶

Remove Byte Order Marks and create Python dictionary containing data mapping for each input file.

In [5]:

import csv
try:
    import json
except:
    !pip install json
    import json 
    
    
#remove BOM
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)

allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))

for obj in allObjects:
    print(json.dumps(obj,indent=2))
    break 

{
  "objectid": "113260",
  "accessioned": "1",
  "accessionnum": "2000.127.20.1-193",
  "locationid": "",
  "title": "Lithographs, Volume 9",
  "displaydate": "",
  "beginyear": "1804",
  "endyear": "1866",
  "visualbrowsertimespan": "1801 to 1825",
  "medium": "book of lithographs",
  "dimensions": "",
  "inscription": "",
  "markings": "",
  "attributioninverted": "Gavarni, Paul",
  "attribution": "Paul Gavarni",
  "creditline": "Ailsa Mellon Bruce Fund",
  "classification": "Volume",
  "subclassification": "",
  "visualbrowserclassification": "volume",
  "parentid": "",
  "isvirtual": "0",
  "departmentabbr": "CG-E",
  "portfolio": "",
  "series": "",
  "volume": "",
  "watermarks": "",
  "lastdetectedmodification": "2020-05-06 22:01:32.06-04",
  "customprinturl": ""
}

Transform to JSON-LD¶

This next step uses the following to transform the collections data to Linked Art JSON-LD

the data mapping
custom coding in createObjProp()
cromulant Python library
custom coding in la including createObjDescription()

The URLs for the artwork digital images are in a separate file. With custom coding in createObjProp() the rows in the two collection data files are mapped to extract the digital image url.

    matchImages = dataFrameNGAImages.query('depictstmsobjectid == ' + objProp["id"] )
    objProp["image_url"] = matchImages["iiifurl"].iloc[0]  + "/full/!500,500/0/default.jpg"

Additional custom code creates a web page URL for the artwork:

objProp["homepage"] = "https://www.nga.gov/collection/art-object-page." + id + ".html"

In [6]:

  
 mapp =  {
    "id":"objectid",
    "accession_number":"accessionnum",
    "accession_date": "",
    "classification" : "classification",
    "title": "title",
    "alt_title": "",
    "notes": "",
    "date_created":"displaydate",
    "date_created_earliest": "beginyear",
    "date_created_latest": "endyear",
    "created_period":"",
    "created_dynasty":"",
    "created_inscriptions":"",
    "created_notes": "",
    "creator":"attribution",
    "physical_medium": "medium",
    "physical_style": "",
    "physical_technique": "",
    "physical_description": "",
    "physical_dimensions": "dimensions",
    "created_provenance": "" ,
    "credit_line": "creditline",
    "collection" : "departmentabbr",
    "current_status" : "",
    "current_owner" : "",
     "image_url": "",
     "homepage": ""
}

# display transposed dataframe of data mapping
display(pd.DataFrame(mapp, index=[0]).T)

	0
id	objectid
accession_number	accessionnum
accession_date
classification	classification
title	title
alt_title
notes
date_created	displaydate
date_created_earliest	beginyear
date_created_latest	endyear
created_period
created_dynasty
created_inscriptions
created_notes
creator	attribution
physical_medium	medium
physical_style
physical_technique
physical_description
physical_dimensions	dimensions
created_provenance
credit_line	creditline
collection	departmentabbr
current_status
current_owner
image_url
homepage

In [7]:

#  baseURI for JSON-LD document
baseURI = "https://www.nga.gov/collection/"


def createObjProp(obj,mapp,baseURI):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp[prop] = [{"id": baseURI +"creatorid/" + obj[mapp["id"]] ,"name": obj[key],"role":"Artist"}]
                else:
                    objProp[prop] = obj[key]
    objProp["homepage"] = ""
    objProp["current_owner"] = {"name":"National Gallery of Art",
                                "location":"Washington, D.C., United States",
                                "type": "http://vocab.getty.edu/aat/300312281" ,
                                "type_label": ""}
    return objProp   

In [8]:

from lib import linkedart as la


try:
    import cromulent
except:
    !pip install cromulent
    import cromulent
from cromulent.model import factory


outputdir = "./data/nga/output/json/all/"

# list to hold file names for use with jsonld visualisation dropdown
selectOptions = []
selectOptions = [('Please select an artwork', '')]



dfimg_list = df_images['depictstmsobjectid'].tolist()
dfimgurl_list = df_images['iiifurl'].tolist()

counter = 1

for obj in allObjects:
    if counter > 100:
        break
    # create object property dictionary
    objProp = createObjProp(obj,mapp,baseURI)
    
    id = objProp["id"]
    object_uri = baseURI + id
    
    if int(id) in dfimg_list:
        df_images_match = df_images.loc[df_images['depictstmsobjectid'] == int(id)]
        objProp["image_url"] = df_images_match.iloc[0]["iiifurl"] + "/full/!500,500/0/default.jpg"
    
        filename = objProp["id"] + ".json"
        selectOptions.append( ( objProp["title"] + " (" + filename + ")" , filename))
        # create obj description
        objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
    
    
        # write to file 
        text_file = open(outputdir + filename, "wt")
        n = text_file.write(factory.toString(objLA, compact=False))
   
        text_file.close()
        counter = counter + 1
    

Explore the Linked Art JSON-LD files¶

Select an artwork from the dropdown to view

the artwork image
a visualisation of the Linked Art JSON-LD representation created above

In [9]:

try:
    import ipywidgets
except:
    %pip install ipywidgets
    import ipywidgets

from ipywidgets import Layout, FileUpload 
from IPython.display import display, IFrame, HTML, Image
from IPython.core.display import Javascript        
 
import os

try:
    import json
except:
    %pip install json
    import json 
    
   
def dropdown_eventhandler(change):
    with open('./src/js/visld.js', 'r') as _jscript:
        code = _jscript.read() + "var file = '" + outputdir + change.new + "';var selector = '#visnga';visjsonld(file, selector); "
        display(Javascript(code))
    
        with open( outputdir + "/" + change.new) as json_file:
    
                artwork = json.load(json_file)
                if ("representation" in artwork):
                    image = artwork["representation"][0]["id"]
                    display(Javascript("document.getElementById('artworknga').src = '" + image   +   "';"))
                else:
                    display(Javascript("document.getElementById('artworknga').src = '';"))
                    

selectObject = ipywidgets.Dropdown(options=selectOptions)
selectObject.observe(dropdown_eventhandler, names='value')

display(selectObject)

Dropdown(options=(('Please select an artwork', ''), ('Studies of Lago Maggiore and and the Entrance to a Palaz…

In [ ]:

	objectid	accessioned	accessionnum	locationid	title	displaydate	beginyear	endyear	visualbrowsertimespan	medium	...	visualbrowserclassification	parentid	departmentabbr	portfolio	series	volume	watermarks	lastdetectedmodification	customprinturl
0	113260	1	2000.127.20.1-193	NaN	Lithographs, Volume 9	NaN	1804.0	1866.0	1801 to 1825	book of lithographs	...	volume	NaN	CG-E	NaN	NaN	NaN	NaN	2020-05-06 22:01:32.06-04	NaN
1	113833	1	2000.127.3.1-172	NaN	Lithographs, Volume 12	NaN	1804.0	1866.0	1801 to 1825	book of lithographs	...	volume	NaN	CG-E	NaN	NaN	NaN	NaN	2020-05-06 22:01:32.06-04	NaN
2	114640	1	2000.127.8.1-110	NaN	Lithographs, Volume 17	NaN	1804.0	1866.0	1801 to 1825	book of lithographs	...	volume	NaN	CG-E	NaN	NaN	NaN	NaN	2020-05-06 22:01:32.06-04	NaN
3	114855	1	2000.127.10.1-28	NaN	Lithographs, Volume 19	NaN	1804.0	1866.0	1801 to 1825	book of lithographs	...	volume	NaN	CG-E	NaN	NaN	NaN	NaN	2020-05-06 22:01:32.06-04	NaN
4	119191	1	2001.100.2.b	NaN	Studies of Lago Maggiore and and the Entrance ...	c. 1700	1700.0	1700.0	1651 to 1700	brown ink over graphite on laid paper	...	drawing	119190.0	CG-E	NaN	NaN	NaN	NaN	2019-10-28 22:01:34.883-04	NaN