#!/usr/bin/env python # coding: utf-8 # In[1]: try: import IPython except: get_ipython().run_line_magic('pip', 'install IPython') import IPython from IPython.display import display, IFrame, HTML, Javascript from IPython.core.display import display, HTML HTML("""""") # # Collections Data to Linked Art # # Cleveland Museum of Art - Simplified # # This notebook provides a guided step through the process of transforming Cultural Heritage collections data to Linked Art JSON-LD. # ## Transform Collections Data to Linked Art # # ### Read Input Data # # - The first step is to read in the collections data. For this notebook the data has been sourced from the Cleveland Museum of Art GitHub repository. # - There is a small admin step to remove Byte Order Mark (BOM) characters from the input file. The data are in CSV format. # - The following code reads the file and displays the first row from the CSV file. # # #### Further Reading # - Cleveland Museum of Art https://www.clevelandart.org # - Cleveland Museum of Art GitHub repository https://github.com/ClevelandMuseumArt/openaccess # - Byte Order Mark https://en.wikipedia.org/wiki/Byte_order_mark # # In[2]: try: import pandas as pd except: get_ipython().run_line_magic('pip', 'install pandas') import pandas as pd file = './data/cma/input/data.csv' #remove BOM s = open(file, mode='r', encoding='utf-8-sig').read() open(file, mode='w', encoding='utf-8').write(s) mpg = pd.read_csv(file,nrows=1,low_memory=False) mpg.head() # ### Convert CSV to Python dictionary # # The Python `csv` module (including csv.DictReader) is used to read the CSV file and create an object that maps the information in each row to a dict whose keys are fieldnames. An example record in the `dict` object is shown below. You'll note that the fields names are now dictionary keys. # # ---- # ##### Further reading # # Python CSV module https://docs.python.org/3/library/csv.html # In[3]: try: import csv except: get_ipython().run_line_magic('pip', 'install csv') import csv try: import json except: get_ipython().run_line_magic('pip', 'install json') import json allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8')) for obj in allObjects: print(json.dumps(obj,indent=2)) break # ### Create field mapping # # The next step involves a manual mapping process where entities in the collections data are mapped to their corresponding representation in Linked Art. # # This is achieved with a python dictionary `mapp`, where the field names in the input data are mapped to variable names that are used later in a mapping/transformation function. # # The `mapp` dictionary is shown below and also shown in table format beneath: # - keys are the variables used in mapping functions # - values are the input data field names # In[4]: mapp = { "id":"id", "accession_number":"accession_number", "accession_date": "", "classification" : "type", "title": "title", "alt_title": "title_in_original_language", "notes": "tombstone", "date_created":"creation_date", "date_created_earliest": "creation_date_earliest", "date_created_latest": "creation_date_latest", "created_period":"culture", "created_dynasty":"", "created_inscriptions":"inscriptions", "created_notes": "fun_fact", "creator":"creators", "physical_medium": "Medium", "physical_style": "", "physical_technique": "technique", "physical_description": "", "physical_dimensions": "measurements", "created_provenance": "provenance" , "credit_line": "creditline", "collection" : "department", "current_status" : "current_location", "current_location": "current_location", "homepage": "url" } # ### Use the mapping to create an artwork property dictionary # # The `mapp` dictionary will now be used, together with the collection data, to create a dictionary containing a key:value pair for an artwork described in the collection data, where the key is the variable name used in a transformation function later on, and the value is the collection data property value for the artwork. # # # The process involves: # - Iteration through the collection data dictionary `mapp` # - create a dictionary `objProp` with the artwork properties mapped with call to `createObjProp` # # An example of the resulting dictionary `objProp` can be viewed in a table below. # # In[5]: def createObjProp(obj,mapp): objProp = {} csv_keys = list(obj.keys()) for key in csv_keys: for prop in mapp: if key == mapp[prop]: if prop == "creator": objProp[prop] = {"name": obj[key], "id":"","role":"artist"} else: objProp[prop] = obj[key] objProp["current_owner"] = {"name":"Cleveland Museum of Art", "location":"Cleveland,Ohio", "type": "http://vocab.getty.edu/aat/300312281" , "type_label": ""} return objProp objProp={} for obj in allObjects: objProp = createObjProp(obj,mapp) break # ### Include Code to transform mapped collections data to Linked Art # # The following code is usually in a separate file, but shown here for illustration. # # The code includes `cromulent` - written by Rob Sanderson - further information at https://github.com/linked-art/crom # # There are also functions that take the `objProp` dictionary as input, to transform a description of artworks in the collections data to a representation in Linked Art JSON-LD. # In[6]: try: import cromulent except: get_ipython().run_line_magic('pip', 'install cromulent') import cromulent from cromulent.model import factory, Actor, Production, BeginningOfExistence, EndOfExistence, TimeSpan, Place from cromulent.model import InformationObject, Phase, VisualItem from cromulent.vocab import Painting, Drawing,Miniature,add_art_setter, PrimaryName, Name, CollectionSet, instances, Sculpture from cromulent.vocab import aat_culture_mapping, AccessionNumber, Height, Width, SupportPart, Gallery, MuseumPlace from cromulent.vocab import BottomPart, Description, RightsStatement, MuseumOrg, Purchase from cromulent.vocab import Furniture, Mosaic, Photograph, Coin, Vessel, Graphic, Enamel, Embroidery, PhotographPrint from cromulent.vocab import PhotographAlbum, PhotographBook, PhotographColor, PhotographBW, Negative, Map, Clothing, Furniture from cromulent.vocab import Sample, Architecture, Armor, Book, DecArts, Implement, Jewelry, Manuscript, SiteInstallation, Text, Print from cromulent.vocab import TimeBasedMedia, Page, Folio, Folder, Box, Envelope, Binder, Case, FlatfileCabinet from cromulent.vocab import HumanMadeObject,Tapestry,LocalNumber from cromulent.vocab import Type,Set from cromulent.vocab import TimeSpan, Actor, Group, Acquisition, Place from cromulent.vocab import Production, TimeSpan, Actor from cromulent.vocab import LinguisticObject,DigitalObject, DigitalService """ Dictionary of Object Types defined in Linked Art """ objTypes = { "Painting": Painting(), "Sculpture": Sculpture(), "Drawing": Drawing(), "Miniature": Miniature(), "Tapestry": Tapestry(), "Furniture": Furniture(), "Furnishings": DecArts(), "Mosaic": Mosaic(), "Photograph": Photograph(), "Coin": Coin(), "Vessel": Vessel(), "Graphic": Graphic(), "Enamel": Enamel(), "Embroidery": Embroidery(), "PhotographPrint": PhotographPrint(), "PhotographAlbum": PhotographAlbum(), "PhotographBook": PhotographBook(), "PhotographColor": PhotographColor(), "PhotographBW": PhotographBW(), "Negative": Negative(), "Map": Map(), "Clothing": Clothing(), "Sample": Sample(), "Architecture": Architecture(), "Armor": Armor(), "Book": Book(), "DecArts": DecArts(), "Implement": Implement(), "Jewelry": Jewelry(), "Manuscript": Manuscript(), "SiteInstallation": SiteInstallation(), "Text": Text(), "Print": Print(), "TimeBasedMedia": TimeBasedMedia(), "Page": Page(), "Folio": Folio(), "Folder": Folder(), "Box": Box(), "Envelope": Envelope(), "Binder": Binder(), "Case": Case(), "FlatfileCabinet": FlatfileCabinet() } def createObjProp(obj,docProp): objProp = {"creator":[]} for prop in obj["atom"]: propName = prop["@name"] propValue = "" if "#text" in prop: propValue = prop["#text"] if propName in list(docProp.keys()): propId = docProp[propName] objProp[propId] = propValue # alternative titles for table in obj["table"]: if table["@name"] == "AltTitles": alt_title = table["tuple"]["atom"]["#text"] objProp["alt_title"] = alt_title if table["@name"] == "Creator1": crerole = creid = crename = "" if "atom" in table["tuple"]: for atom in table["tuple"]["atom"]: if atom["@name"] == "CreRole": crerole = atom["#text"] if atom["@name"] == "irn": creid = atom["#text"] if atom["@name"] == "SummaryData": crename = atom["#text"] creator = {"id":creid,"name":crename,"role":crerole} objProp["creator"].append(creator) return objProp def objPrimaryname(objProp,object_uri): primaryname = None title = objProp["title"] id = str(objProp["id"]) primaryname = PrimaryName( object_uri + "/primary-name", value=title) return primaryname def objAlternatename(objProp,object_uri): alternateName = None if "alt_title" in objProp: alt_title = objProp["alt_title"] alternatename = AlternateName(object_uri + "/alternate-name",value=alt_title) return alternateName def objHomepage(objProp,object_uri): homepage = None homepageId = objProp["homepage"] if homepageId != "": homepage = LinguisticObject(homepageId, label="Homepage for the Object") homepage.classified_as = Type("http://vocab/getty.edu/aat/300264578", label="Web pages (documents)") homepage.classified_as = Type("http://vocab.getty.edu/aat/300266277", label="home pages") homepage.format = "text/html" return homepage def objProvenance(objProp,object_uri): prov = None if "created_provenance" in objProp: provenance = objProp["created_provenance"] if provenance !="": prov = LinguisticObject(object_uri + "/provenance-statement", value=provenance, label="Provenance Statement about the Object" ) prov.classified_as = Type("http://vocab.getty.edu/aat/300055863", label="provenance (history of ownership)") prov.classified_as = Type("http://vocab.getty.edu/aat/300418049", label="brief texts") return prov def objAccession(objProp, object_uri): accession = None accession_number = objProp["accession_number"] if accession_number != "": accession = AccessionNumber(accession_number,value=accession_number) return accession def objLocalnumber(objProp,object_uri): localnumber = None id = str(objProp["id"]) if id != "": localnumber = LocalNumber(id,value=id) return localnumber def objCollection(objProp,object_uri): coll = None if "collection" in objProp: collection = objProp["collection"] coll = Set(object_uri +"/collection/" + collection, label= collection) coll.classified_as = Type("http://vocab.getty.edu/aat/300025976", label="collections (object groupings)") return coll def objCredit(objProp,object_uri): credit = None propCredit = "credit_line" if propCredit in objProp: credit_line = objProp[propCredit] if credit_line != "": credit = LinguisticObject(object_uri + "/credit-line", value=credit_line, label="Credit Line for the Object" ) credit.classified_as = Type("http://vocab.getty.edu/aat/300026687", label="acknowledgements") credit.classified_as = Type("http://vocab.getty.edu/aat/300418049", label="brief texts") return credit def objProduction(objProp,object_uri): prod = None date_created = "date_created" created_earliest = "date_created_earliest" created_latest = "date_created_latest" if date_created in objProp: prod = Production(object_uri + "/production", label="Production of the Object") labelTimespan = "date unknown" if objProp[date_created] != "": labelTimespan = objProp[date_created] timespan = TimeSpan(object_uri + "/production/timespan", label=labelTimespan) if created_earliest in objProp: timespan.begin_of_the_begin = objProp[created_earliest] if created_latest in objProp: timespan.end_of_the_end = objProp[created_latest] prod.timespan = timespan propCreator = "creator" if propCreator in objProp: id = label = "" creator = objProp[propCreator] for prop in creator: if "id" == prop: id = creator["id"] if "name" == prop: label = creator["name"] if label != "": actor = Actor(id,label) prod.carried_out_by = actor return prod def objCurrentowner(objProp,object_uri): current_owner = None if "current_owner" in objProp and objProp["current_owner"]["name"] != "": cowner = objProp["current_owner"]["name"] current_owner = Group( "http://vocab.getty.edu/ulan/500300517",label=cowner) current_owner.classified_as = Type( "http://vocab.getty.edu/aat/300312281",label="museums (institutions)") acquisition = objAcquisition(objProp,object_uri) if acquisition is not None: current_owner.acquired_title_through = acquisition return current_owner def objAcquisition(objProp,object_uri): acquisition = None if "accession_date" in objProp and objProp["accession_date"] != "": acquisition = Acquisition(object_uri + "/IMA-acquisition", label = "Acquisition of the Object") acquisition.classified_as = Type("http://vocab.getty.edu/aat/300157782", label="acquisition (collections management)") if "name_location" in objProp["current_owner"]: acquisition.took_place_at = Place("http://vocab.getty.edu/tgn/7012924", label=objProp["current_owner"]["name_location"]) acquisition.timespan = objAcquisitionTimespan(object_uri,objProp["accession_date"]) return acquisition def objAcquisitionTimespan(object_uri,accession_date): timespan = None end = begin = "" accession_date = str(accession_date) if len(accession_date) == 4: begin = accession_date + "-01-01T00:00:00.000Z" end = accession_date + "-12-31T00:00:00.000Z" elif len(accession_date) == 8: begin = accession_date + "01T00:00:00.000Z" end = accession_date if '-02-' in accession_date: end = end + "28" if ('-01-','-03-','-05-','-07-','-08-','-09-','-10-','-12-') in accession_date: end = end + "31" if ('-04-','-06-','-09-','-11-'): end = end + "30" end = end + "T00:00:00" elif len(accession_date) == 10: begin = accession_date + "T00:00:00.000Z" end = accession_date + "T00:00:00.000Z" else: begin = end = "" timespan = TimeSpan(object_uri + "/IMA-acquisition/timespan", label=accession_date) if begin != "": timespan.begin_of_the_begin = begin if end != "": timespan.end_of_the_end = end return timespan def objCustody(objProp,object_uri): custody = None if "current_status" in objProp and objProp["current_status"] != "" : current_status = objProp["current_status"] current_owner = checkCurrentOwner(current_status) if current_owner == False: name = objProp["current_owner"]["name"] type = objProp["current_owner"]["type"] label = objProp["current_owner"]["type_label"] custody = Group(label=name) custody.classified_as = Type(type, label=label) return custody def objDigitalObject(objProp,object_uri): vi = None url = objProp["image_url"] vi = VisualItem(url, label="Digital image of artwork") do = DigitalObject(url,label="Image of artwork") do.classified_as = Type("http://vocab.getty.edu/aat/300215302","Digital Image") do.access_point = DigitalObject(url) if "iiif" in url.lower(): ds = DigitalService() ds.access_point = DigitalObject(url) ds.conforms_to = InformationObject("http://iiif.io/api/image") do.digitally_available_via = ds vi.digitally_shown_by = do return vi def createObjDesc(objProp,objTypes,object_uri): objLA = None objLA = HumanMadeObject() # linked art object for otype in objTypes: if otype in objProp["classification"]: objLA = objTypes[otype] break objLA.id = object_uri objLA._label = objProp["title"] # DIGITAL OBJECT if "image_url" in objProp and objProp["image_url"] != "": dig = objDigitalObject(objProp,object_uri) if dig is not None: objLA.representation = [] objLA.representation.append(dig) # IDENTIFIED_BY accession = objAccession(objProp,object_uri) localnumber = objLocalnumber(objProp,object_uri) primaryname = objPrimaryname(objProp,object_uri) listIds = (accession,localnumber,primaryname) identified_by = False for id in listIds: if id is not None: identified_by = True break if identified_by == True: objLA.identified_by = [] for id in listIds: if id is not None: objLA.identified_by.append(id) # REFERRED_TO_BY objLA.referred_to_by = None prov = objProvenance(objProp,object_uri) credit = objCredit(objProp,object_uri) referred_to_by = False if prov is not None or credit is not None: referred_to_by = True if referred_to_by == True: objLA.referred_to_by = [] if prov is not None: objLA.referred_to_by.append(prov) # provenance if credit is not None: objLA.referred_to_by.append(credit) # credit line # SUBJECT_OF objLA.subject_of = None homepage = None homepage = objHomepage(objProp,object_uri) if homepage is not None: objLA.subject_of = homepage # home page # PRODUCED_BY objLA.produced_by = None if "creator" in objProp: prod = None prod = objProduction(objProp,object_uri) objLA.produced_by = None if prod is not None: objLA.produced_by = prod # production # MEMBER_OF objLA.member_of = None if "collection" in objProp: coll = None coll = objCollection(objProp,object_uri) if coll is not None: objLA.member_of = coll # collection # CURRENT_KEEPER objLA.current_owner = None custody = None custody = objCustody(objProp,object_uri) if custody is not None: objLA.current_owner = custody # CURRENT_OWNER if "current_owner" in objProp and objProp["current_owner"] != "": current_owner = objCurrentowner(objProp,object_uri) if current_owner is not None: objLA.current_owner = current_owner return objLA def checkCurrentOwner(current_status): currentowner = False if current_status != "": checkObjStatus = ('Accessioned','Partial Accession') for status in checkObjStatus: if status == current_status: currentowner = True if 'IMA-Owned' in current_status: currentowner = True return currentowner # ### Generate Linked Art JSON-LD and write to file # # Making use of the code above, the final part of the transformation process takes the `objProp` dictionary with the mapped artwork properties, and returns Linked Art JSON-LD, that is then saved to a file. # In[7]: # baseURI for JSON-LD document baseURI = "https://clevelandart.org/art/" id = obj[mapp.get("id")] object_uri = baseURI + id objLA = None objLA = createObjDesc(objProp,objTypes,object_uri) # write to file text_file = open( id + ".json", "wt") n = text_file.write(factory.toString(objLA, compact=False)) text_file.close() # ### Linked Art # # An example of the Linked Art JSON-LD representation of an artwork is shown below. # # To more easily view the representation, you may like to copy and paste the text below into the JSON-LD Playground website that offers a visualisation option - visit https://json-ld.org/playground/ # In[8]: print(factory.toString(objLA, compact=False)) # In[10]: HTML('
') # In[12]: from IPython.display import display,HTML,Javascript code2 = 'var data = ' + factory.toString(objLA, compact=False) + '; var selector = "#example"; visjsonld(data, selector); ' with open('src/js/visld2.js', 'r') as _jscript: code = _jscript.read() + code2 Javascript(code) # In[ ]: