try:
import IPython
except:
%pip install IPython
import IPython
from IPython.display import display, IFrame, HTML, Javascript
from IPython.core.display import display, HTML
HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")
try:
import pandas as pd
except:
%pip install pandas
import pandas as pd
file = './data/cma/input/data.csv'
#remove BOM
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)
mpg = pd.read_csv(file,nrows=1,low_memory=False)
mpg.head()
id | accession_number | share_license_status | tombstone | current_location | title | title_in_original_language | series | series_in_original_language | creation_date | ... | digital_description | wall_description | external_resources | citations | catalogue_raisonne | url | image_web | image_print | image_full | updated_at | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 74539 | 2015.449 | CC0 | A Miller's Carriage, c. 1895. Albert-Charles L... | NaN | A Miller's Carriage | NaN | NaN | NaN | c. 1895 | ... | NaN | NaN | {'wikidata': [], 'internet_archive': ['https:/... | NaN | NaN | https://clevelandart.org/art/2015.449 | https://openaccess-cdn.clevelandart.org/2015.4... | https://openaccess-cdn.clevelandart.org/2015.4... | https://openaccess-cdn.clevelandart.org/2015.4... | 2021-06-29 06:35:50.572000 |
1 rows × 41 columns
The Python csv
module (including csv.DictReader) is used to read the CSV file and create an object that maps the information in each row to a dict whose keys are fieldnames. An example record in the dict
object is shown below. You'll note that the fields names are now dictionary keys.
Python CSV module https://docs.python.org/3/library/csv.html
try:
import csv
except:
%pip install csv
import csv
try:
import json
except:
%pip install json
import json
allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))
for obj in allObjects:
print(json.dumps(obj,indent=2))
break
{ "id": "74539", "accession_number": "2015.449", "share_license_status": "CC0", "tombstone": "A Miller's Carriage, c. 1895. Albert-Charles Lebourg (French, 1849-1928). Black and white chalk with stumping ; sheet: 33.2 x 49.7 cm (13 1/16 x 19 9/16 in.). The Cleveland Museum of Art, Bequest of Muriel Butkin 2015.449", "current_location": "", "title": "A Miller's Carriage", "title_in_original_language": "", "series": "", "series_in_original_language": "", "creation_date": "c. 1895", "creation_date_earliest": "1890", "creation_date_latest": "1900", "creators": "Albert-Charles Lebourg (French, 1849-1928), artist", "culture": "France, 19th-20th century", "technique": "Black and white chalk with stumping ", "support_materials": "gray laid paper", "department": "Drawings", "collection": "DR - French", "type": "Drawing", "measurements": "Sheet: 33.2 x 49.7 cm (13 1/16 x 19 9/16 in.)", "state_of_the_work": "", "edition_of_the_work": "", "creditline": "Bequest of Muriel Butkin", "copyright": "", "inscriptions": "Verso: upper right in green ink: 80355 / circular estate stamp (Lugt 1664b) ", "exhibitions": " **LEGACY: Exposition de Lebourg, Mus\u00e9e de Rouen, 1932", "provenance": "", "find_spot": "", "related_works": "", "former_accession_numbers": "[]", "fun_fact": "", "digital_description": "", "wall_description": "", "external_resources": "{'wikidata': [], 'internet_archive': ['https://archive.org/details/clevelandart-2015.449-a-miller-s-carriage']}", "citations": "", "catalogue_raisonne": "", "url": "https://clevelandart.org/art/2015.449", "image_web": "https://openaccess-cdn.clevelandart.org/2015.449/2015.449_web.jpg", "image_print": "https://openaccess-cdn.clevelandart.org/2015.449/2015.449_print.jpg", "image_full": "https://openaccess-cdn.clevelandart.org/2015.449/2015.449_full.tif", "updated_at": "2021-06-29 06:35:50.572000" }
The next step involves a manual mapping process where entities in the collections data are mapped to their corresponding representation in Linked Art.
This is achieved with a python dictionary mapp
, where the field names in the input data are mapped to variable names that are used later in a mapping/transformation function.
The mapp
dictionary is shown below and also shown in table format beneath:
mapp = {
"id":"id",
"accession_number":"accession_number",
"accession_date": "",
"classification" : "type",
"title": "title",
"alt_title": "title_in_original_language",
"notes": "tombstone",
"date_created":"creation_date",
"date_created_earliest": "creation_date_earliest",
"date_created_latest": "creation_date_latest",
"created_period":"culture",
"created_dynasty":"",
"created_inscriptions":"inscriptions",
"created_notes": "fun_fact",
"creator":"creators",
"physical_medium": "Medium",
"physical_style": "",
"physical_technique": "technique",
"physical_description": "",
"physical_dimensions": "measurements",
"created_provenance": "provenance" ,
"credit_line": "creditline",
"collection" : "department",
"current_status" : "current_location",
"current_location": "current_location",
"homepage": "url"
}
The mapp
dictionary will now be used, together with the collection data, to create a dictionary containing a key:value pair for an artwork described in the collection data, where the key is the variable name used in a transformation function later on, and the value is the collection data property value for the artwork.
The process involves:
mapp
objProp
with the artwork properties mapped with call to createObjProp
An example of the resulting dictionary objProp
can be viewed in a table below.
def createObjProp(obj,mapp):
objProp = {}
csv_keys = list(obj.keys())
for key in csv_keys:
for prop in mapp:
if key == mapp[prop]:
if prop == "creator":
objProp[prop] = {"name": obj[key], "id":"","role":"artist"}
else:
objProp[prop] = obj[key]
objProp["current_owner"] = {"name":"Cleveland Museum of Art",
"location":"Cleveland,Ohio",
"type": "http://vocab.getty.edu/aat/300312281" ,
"type_label": ""}
return objProp
objProp={}
for obj in allObjects:
objProp = createObjProp(obj,mapp)
break
The following code is usually in a separate file, but shown here for illustration.
The code includes cromulent
- written by Rob Sanderson - further information at https://github.com/linked-art/crom
There are also functions that take the objProp
dictionary as input, to transform a description of artworks in the collections data to a representation in Linked Art JSON-LD.
try:
import cromulent
except:
%pip install cromulent
import cromulent
from cromulent.model import factory, Actor, Production, BeginningOfExistence, EndOfExistence, TimeSpan, Place
from cromulent.model import InformationObject, Phase, VisualItem
from cromulent.vocab import Painting, Drawing,Miniature,add_art_setter, PrimaryName, Name, CollectionSet, instances, Sculpture
from cromulent.vocab import aat_culture_mapping, AccessionNumber, Height, Width, SupportPart, Gallery, MuseumPlace
from cromulent.vocab import BottomPart, Description, RightsStatement, MuseumOrg, Purchase
from cromulent.vocab import Furniture, Mosaic, Photograph, Coin, Vessel, Graphic, Enamel, Embroidery, PhotographPrint
from cromulent.vocab import PhotographAlbum, PhotographBook, PhotographColor, PhotographBW, Negative, Map, Clothing, Furniture
from cromulent.vocab import Sample, Architecture, Armor, Book, DecArts, Implement, Jewelry, Manuscript, SiteInstallation, Text, Print
from cromulent.vocab import TimeBasedMedia, Page, Folio, Folder, Box, Envelope, Binder, Case, FlatfileCabinet
from cromulent.vocab import HumanMadeObject,Tapestry,LocalNumber
from cromulent.vocab import Type,Set
from cromulent.vocab import TimeSpan, Actor, Group, Acquisition, Place
from cromulent.vocab import Production, TimeSpan, Actor
from cromulent.vocab import LinguisticObject,DigitalObject, DigitalService
"""
Dictionary of Object Types defined in Linked Art
"""
objTypes = {
"Painting": Painting(),
"Sculpture": Sculpture(),
"Drawing": Drawing(),
"Miniature": Miniature(),
"Tapestry": Tapestry(),
"Furniture": Furniture(),
"Furnishings": DecArts(),
"Mosaic": Mosaic(),
"Photograph": Photograph(),
"Coin": Coin(),
"Vessel": Vessel(),
"Graphic": Graphic(),
"Enamel": Enamel(),
"Embroidery": Embroidery(),
"PhotographPrint": PhotographPrint(),
"PhotographAlbum": PhotographAlbum(),
"PhotographBook": PhotographBook(),
"PhotographColor": PhotographColor(),
"PhotographBW": PhotographBW(),
"Negative": Negative(),
"Map": Map(),
"Clothing": Clothing(),
"Sample": Sample(),
"Architecture": Architecture(),
"Armor": Armor(),
"Book": Book(),
"DecArts": DecArts(),
"Implement": Implement(),
"Jewelry": Jewelry(),
"Manuscript": Manuscript(),
"SiteInstallation": SiteInstallation(),
"Text": Text(),
"Print": Print(),
"TimeBasedMedia": TimeBasedMedia(),
"Page": Page(),
"Folio": Folio(),
"Folder": Folder(),
"Box": Box(),
"Envelope": Envelope(),
"Binder": Binder(),
"Case": Case(),
"FlatfileCabinet": FlatfileCabinet()
}
def createObjProp(obj,docProp):
objProp = {"creator":[]}
for prop in obj["atom"]:
propName = prop["@name"]
propValue = ""
if "#text" in prop:
propValue = prop["#text"]
if propName in list(docProp.keys()):
propId = docProp[propName]
objProp[propId] = propValue
# alternative titles
for table in obj["table"]:
if table["@name"] == "AltTitles":
alt_title = table["tuple"]["atom"]["#text"]
objProp["alt_title"] = alt_title
if table["@name"] == "Creator1":
crerole = creid = crename = ""
if "atom" in table["tuple"]:
for atom in table["tuple"]["atom"]:
if atom["@name"] == "CreRole":
crerole = atom["#text"]
if atom["@name"] == "irn":
creid = atom["#text"]
if atom["@name"] == "SummaryData":
crename = atom["#text"]
creator = {"id":creid,"name":crename,"role":crerole}
objProp["creator"].append(creator)
return objProp
def objPrimaryname(objProp,object_uri):
primaryname = None
title = objProp["title"]
id = str(objProp["id"])
primaryname = PrimaryName( object_uri + "/primary-name",
value=title)
return primaryname
def objAlternatename(objProp,object_uri):
alternateName = None
if "alt_title" in objProp:
alt_title = objProp["alt_title"]
alternatename = AlternateName(object_uri + "/alternate-name",value=alt_title)
return alternateName
def objHomepage(objProp,object_uri):
homepage = None
homepageId = objProp["homepage"]
if homepageId != "":
homepage = LinguisticObject(homepageId, label="Homepage for the Object")
homepage.classified_as = Type("http://vocab/getty.edu/aat/300264578", label="Web pages (documents)")
homepage.classified_as = Type("http://vocab.getty.edu/aat/300266277", label="home pages")
homepage.format = "text/html"
return homepage
def objProvenance(objProp,object_uri):
prov = None
if "created_provenance" in objProp:
provenance = objProp["created_provenance"]
if provenance !="":
prov = LinguisticObject(object_uri + "/provenance-statement",
value=provenance,
label="Provenance Statement about the Object"
)
prov.classified_as = Type("http://vocab.getty.edu/aat/300055863", label="provenance (history of ownership)")
prov.classified_as = Type("http://vocab.getty.edu/aat/300418049", label="brief texts")
return prov
def objAccession(objProp, object_uri):
accession = None
accession_number = objProp["accession_number"]
if accession_number != "":
accession = AccessionNumber(accession_number,value=accession_number)
return accession
def objLocalnumber(objProp,object_uri):
localnumber = None
id = str(objProp["id"])
if id != "":
localnumber = LocalNumber(id,value=id)
return localnumber
def objCollection(objProp,object_uri):
coll = None
if "collection" in objProp:
collection = objProp["collection"]
coll = Set(object_uri +"/collection/" + collection,
label= collection)
coll.classified_as = Type("http://vocab.getty.edu/aat/300025976",
label="collections (object groupings)")
return coll
def objCredit(objProp,object_uri):
credit = None
propCredit = "credit_line"
if propCredit in objProp:
credit_line = objProp[propCredit]
if credit_line != "":
credit = LinguisticObject(object_uri + "/credit-line",
value=credit_line,
label="Credit Line for the Object"
)
credit.classified_as = Type("http://vocab.getty.edu/aat/300026687", label="acknowledgements")
credit.classified_as = Type("http://vocab.getty.edu/aat/300418049", label="brief texts")
return credit
def objProduction(objProp,object_uri):
prod = None
date_created = "date_created"
created_earliest = "date_created_earliest"
created_latest = "date_created_latest"
if date_created in objProp:
prod = Production(object_uri + "/production", label="Production of the Object")
labelTimespan = "date unknown"
if objProp[date_created] != "":
labelTimespan = objProp[date_created]
timespan = TimeSpan(object_uri + "/production/timespan", label=labelTimespan)
if created_earliest in objProp:
timespan.begin_of_the_begin = objProp[created_earliest]
if created_latest in objProp:
timespan.end_of_the_end = objProp[created_latest]
prod.timespan = timespan
propCreator = "creator"
if propCreator in objProp:
id = label = ""
creator = objProp[propCreator]
for prop in creator:
if "id" == prop:
id = creator["id"]
if "name" == prop:
label = creator["name"]
if label != "":
actor = Actor(id,label)
prod.carried_out_by = actor
return prod
def objCurrentowner(objProp,object_uri):
current_owner = None
if "current_owner" in objProp and objProp["current_owner"]["name"] != "":
cowner = objProp["current_owner"]["name"]
current_owner = Group( "http://vocab.getty.edu/ulan/500300517",label=cowner)
current_owner.classified_as = Type( "http://vocab.getty.edu/aat/300312281",label="museums (institutions)")
acquisition = objAcquisition(objProp,object_uri)
if acquisition is not None:
current_owner.acquired_title_through = acquisition
return current_owner
def objAcquisition(objProp,object_uri):
acquisition = None
if "accession_date" in objProp and objProp["accession_date"] != "":
acquisition = Acquisition(object_uri + "/IMA-acquisition", label = "Acquisition of the Object")
acquisition.classified_as = Type("http://vocab.getty.edu/aat/300157782",
label="acquisition (collections management)")
if "name_location" in objProp["current_owner"]:
acquisition.took_place_at = Place("http://vocab.getty.edu/tgn/7012924",
label=objProp["current_owner"]["name_location"])
acquisition.timespan = objAcquisitionTimespan(object_uri,objProp["accession_date"])
return acquisition
def objAcquisitionTimespan(object_uri,accession_date):
timespan = None
end = begin = ""
accession_date = str(accession_date)
if len(accession_date) == 4:
begin = accession_date + "-01-01T00:00:00.000Z"
end = accession_date + "-12-31T00:00:00.000Z"
elif len(accession_date) == 8:
begin = accession_date + "01T00:00:00.000Z"
end = accession_date
if '-02-' in accession_date:
end = end + "28"
if ('-01-','-03-','-05-','-07-','-08-','-09-','-10-','-12-') in accession_date:
end = end + "31"
if ('-04-','-06-','-09-','-11-'):
end = end + "30"
end = end + "T00:00:00"
elif len(accession_date) == 10:
begin = accession_date + "T00:00:00.000Z"
end = accession_date + "T00:00:00.000Z"
else:
begin = end = ""
timespan = TimeSpan(object_uri + "/IMA-acquisition/timespan", label=accession_date)
if begin != "":
timespan.begin_of_the_begin = begin
if end != "":
timespan.end_of_the_end = end
return timespan
def objCustody(objProp,object_uri):
custody = None
if "current_status" in objProp and objProp["current_status"] != "" :
current_status = objProp["current_status"]
current_owner = checkCurrentOwner(current_status)
if current_owner == False:
name = objProp["current_owner"]["name"]
type = objProp["current_owner"]["type"]
label = objProp["current_owner"]["type_label"]
custody = Group(label=name)
custody.classified_as = Type(type,
label=label)
return custody
def objDigitalObject(objProp,object_uri):
vi = None
url = objProp["image_url"]
vi = VisualItem(url, label="Digital image of artwork")
do = DigitalObject(url,label="Image of artwork")
do.classified_as = Type("http://vocab.getty.edu/aat/300215302","Digital Image")
do.access_point = DigitalObject(url)
if "iiif" in url.lower():
ds = DigitalService()
ds.access_point = DigitalObject(url)
ds.conforms_to = InformationObject("http://iiif.io/api/image")
do.digitally_available_via = ds
vi.digitally_shown_by = do
return vi
def createObjDesc(objProp,objTypes,object_uri):
objLA = None
objLA = HumanMadeObject() # linked art object
for otype in objTypes:
if otype in objProp["classification"]:
objLA = objTypes[otype]
break
objLA.id = object_uri
objLA._label = objProp["title"]
# DIGITAL OBJECT
if "image_url" in objProp and objProp["image_url"] != "":
dig = objDigitalObject(objProp,object_uri)
if dig is not None:
objLA.representation = []
objLA.representation.append(dig)
# IDENTIFIED_BY
accession = objAccession(objProp,object_uri)
localnumber = objLocalnumber(objProp,object_uri)
primaryname = objPrimaryname(objProp,object_uri)
listIds = (accession,localnumber,primaryname)
identified_by = False
for id in listIds:
if id is not None:
identified_by = True
break
if identified_by == True:
objLA.identified_by = []
for id in listIds:
if id is not None:
objLA.identified_by.append(id)
# REFERRED_TO_BY
objLA.referred_to_by = None
prov = objProvenance(objProp,object_uri)
credit = objCredit(objProp,object_uri)
referred_to_by = False
if prov is not None or credit is not None:
referred_to_by = True
if referred_to_by == True:
objLA.referred_to_by = []
if prov is not None:
objLA.referred_to_by.append(prov) # provenance
if credit is not None:
objLA.referred_to_by.append(credit) # credit line
# SUBJECT_OF
objLA.subject_of = None
homepage = None
homepage = objHomepage(objProp,object_uri)
if homepage is not None:
objLA.subject_of = homepage # home page
# PRODUCED_BY
objLA.produced_by = None
if "creator" in objProp:
prod = None
prod = objProduction(objProp,object_uri)
objLA.produced_by = None
if prod is not None:
objLA.produced_by = prod # production
# MEMBER_OF
objLA.member_of = None
if "collection" in objProp:
coll = None
coll = objCollection(objProp,object_uri)
if coll is not None:
objLA.member_of = coll # collection
# CURRENT_KEEPER
objLA.current_owner = None
custody = None
custody = objCustody(objProp,object_uri)
if custody is not None:
objLA.current_owner = custody
# CURRENT_OWNER
if "current_owner" in objProp and objProp["current_owner"] != "":
current_owner = objCurrentowner(objProp,object_uri)
if current_owner is not None:
objLA.current_owner = current_owner
return objLA
def checkCurrentOwner(current_status):
currentowner = False
if current_status != "":
checkObjStatus = ('Accessioned','Partial Accession')
for status in checkObjStatus:
if status == current_status:
currentowner = True
if 'IMA-Owned' in current_status:
currentowner = True
return currentowner
Making use of the code above, the final part of the transformation process takes the objProp
dictionary with the mapped artwork properties, and returns Linked Art JSON-LD, that is then saved to a file.
# baseURI for JSON-LD document
baseURI = "https://clevelandart.org/art/"
id = obj[mapp.get("id")]
object_uri = baseURI + id
objLA = None
objLA = createObjDesc(objProp,objTypes,object_uri)
# write to file
text_file = open( id + ".json", "wt")
n = text_file.write(factory.toString(objLA, compact=False))
text_file.close()
An example of the Linked Art JSON-LD representation of an artwork is shown below.
To more easily view the representation, you may like to copy and paste the text below into the JSON-LD Playground website that offers a visualisation option - visit https://json-ld.org/playground/
print(factory.toString(objLA, compact=False))
{ "@context": "https://linked.art/ns/v1/linked-art.json", "id": "https://clevelandart.org/art/74540", "type": "HumanMadeObject", "_label": "Leda and the Swan", "classified_as": { "id": "http://vocab.getty.edu/aat/300033973", "type": "Type", "_label": "Drawing", "classified_as": { "id": "http://vocab.getty.edu/aat/300435443", "type": "Type", "_label": "Type of Work" } }, "identified_by": [ { "id": "http://lod.example.org/museum/Identifier/2015.451", "type": "Identifier", "classified_as": { "id": "http://vocab.getty.edu/aat/300312355", "type": "Type", "_label": "Accession Number" }, "content": "2015.451" }, { "id": "http://lod.example.org/museum/Identifier/74540", "type": "Identifier", "classified_as": { "id": "http://vocab.getty.edu/aat/300404621", "type": "Type", "_label": "Owner-Assigned Number" }, "content": "74540" }, { "id": "https://clevelandart.org/art/74540/primary-name", "type": "Name", "classified_as": { "id": "http://vocab.getty.edu/aat/300404670", "type": "Type", "_label": "Primary Name" }, "content": "Leda and the Swan" } ], "referred_to_by": [ { "id": "https://clevelandart.org/art/74540/credit-line", "type": "LinguisticObject", "_label": "Credit Line for the Object", "classified_as": { "id": "http://vocab.getty.edu/aat/300418049", "type": "Type", "_label": "brief texts" }, "content": "Bequest of Muriel Butkin" } ], "member_of": { "id": "https://clevelandart.org/art/74540/collection/Drawings", "type": "Set", "_label": "Drawings", "classified_as": { "id": "http://vocab.getty.edu/aat/300025976", "type": "Type", "_label": "collections (object groupings)" } }, "subject_of": { "id": "https://clevelandart.org/art/2015.451", "type": "LinguisticObject", "_label": "Homepage for the Object", "classified_as": { "id": "http://vocab.getty.edu/aat/300266277", "type": "Type", "_label": "home pages" }, "format": "text/html" }, "produced_by": { "id": "https://clevelandart.org/art/74540/production", "type": "Production", "_label": "Production of the Object", "timespan": { "id": "https://clevelandart.org/art/74540/production/timespan", "type": "TimeSpan", "_label": "date unknown" }, "carried_out_by": { "type": "Actor", "_label": "Adolphe Yvon (French, 1817-1893), artist" } }, "current_owner": { "id": "http://vocab.getty.edu/ulan/500300517", "type": "Group", "_label": "Cleveland Museum of Art", "classified_as": { "id": "http://vocab.getty.edu/aat/300312281", "type": "Type", "_label": "museums (institutions)" } } }
HTML('<div id="example" style="height:100%;width:3000px"/>')
from IPython.display import display,HTML,Javascript
code2 = 'var data = ' + factory.toString(objLA, compact=False) + '; var selector = "#example"; visjsonld(data, selector); '
with open('src/js/visld2.js', 'r') as _jscript:
code = _jscript.read() + code2
Javascript(code)