#!/usr/bin/env python # coding: utf-8 # In[1]: try: import IPython except: get_ipython().run_line_magic('pip', 'install IPython') import IPython from IPython.display import display, IFrame, HTML, Javascript from IPython.core.display import HTML HTML("""""") # # Transforming Collection Data to Linked Art # # Indianapolis Museum of Art # # This notebook provided a step through the process of transforming collections data to Linked Art. # # - The notebook uses collections data from the [Indianapolis Museum of Art (IMA)]((https://discovernewfields.org/)). # - You will need to download the IMA collections data file when prompted. # - The coded transformation is based on the transformation encoded in the IMA's XSLT file, available in the [IMA GitHub repository](https://github.com/IMAmuseum/LinkedArt). # - The Linked Art data model documentation included in the notebook has been sourced from the [Linked Art website](https://linked.art). # # # ##### Further Reading # # - [Indianapolis Museum of Art (IMA)](https://discovernewfields.org/) # - [IMA GitHub repository](https://github.com/IMAmuseum/LinkedArt) # - [Linked Art website](https://linked.art) # # # ## Method # # - Upload collections data file # - Create dropdown of artworks to transform # - Select an artwork from dropdown, to transform to Linked Art # - Map collection data to Linked Art data model # - Transform artwork description to Linked Art # - Visualise Linked Art JSON-LD representation of artwork # # ## Load Collections Data File # # The Indianapolis Museum of Art (IMA) has transformed a sample of its collections data to Linked Art, and originated from the IMA's [EMu Collections Management Systems](https://www.axiell.com/solutions/product/emu/) in XML format. # # - The [XML data files](https://github.com/IMAmuseum/LinkedArt/blob/master/XML) are available via # - [IMA's LinkedArt GitHub repository](https://github.com/IMAmuseum/LinkedArt), # - including the [Objects Sample XML file](https://github.com/IMAmuseum/LinkedArt/blob/master/XML/ObjectsSample.xml) ([raw file](https://raw.githubusercontent.com/IMAmuseum/LinkedArt/master/XML/ObjectsSample.xml)). # # # ---- # ##### Further reading # - IMA's [EMu Collections Management Systems](https://www.axiell.com/solutions/product/emu/) # - [XML data files](https://github.com/IMAmuseum/LinkedArt/blob/master/XML) # - [IMA's LinkedArt GitHub repository](https://github.com/IMAmuseum/LinkedArt) # - [Objects Sample XML file](https://github.com/IMAmuseum/LinkedArt/blob/master/XML/ObjectsSample.xml) ([Objects Sample XML - raw file](https://raw.githubusercontent.com/IMAmuseum/LinkedArt/master/XML/ObjectsSample.xml)) # # The following code that has been commented out, allows you to upload an XML file of your choice. # # In[2]: try: import ipywidgets except: get_ipython().system('pip install ipywidgets') import ipywidgets from ipywidgets import Layout, FileUpload """ # define file upload widget upload = FileUpload(accept='.xml', multiple=False, description='Select XML file') upload """ print("") # In[3]: try: import xmltodict except: get_ipython().system('pip install xmltodict') import xmltodict """ obj = False # get content from uploaded file for uploaded_filename in upload.value: content = upload.value[uploaded_filename]['content'] obj = xmltodict.parse(content) if obj == False: display(HTML("
Please select a file to transform
")) else: display(HTML("
File uploaded
")) content = upload.value[uploaded_filename]['content'] obj = xmltodict.parse(content) """ print("") # In[4]: with open('data/ima/input/ObjectsSample.xml') as fd: content = fd.read() obj = xmltodict.parse(content) allObjects = obj["table"]["tuple"] #object dropdown options objOptions = [] objOptions.append(("Please select an object",'')) for obj in allObjects: title = irn = "" # define properties variables for prop in obj["atom"]: propName = prop["@name"] if "#text" in prop: if propName == "irn": irn = prop["#text"] if propName == "TitMainTitle": title = prop["#text"] objOptions.append((irn + ' : ' + title,irn)) selectObject = ipywidgets.Dropdown(options=objOptions, description='Select Object') # ## Select an Artwork to Transform to Linked Art # The next step will create a dropdown list of artworks from the XML file. # #
Select an artwork from the dropdown list to transform the catalogue data to Linked Art JSON-LD
# In[6]: display(selectObject) selectedIRN = selectObject.value if selectedIRN == "": display(HTML("
Please select an artwork to transform
")) else: display(HTML("
Artwork selected : " + selectObject.options[selectObject.index][0] + "
# ### Identifiers # # This section creates representations of different types of identifiers for the artwork, e.g. accession number. # # ---- # # ##### Linked Art data model # # Many resources of interest are given external identifiers, such as accession numbers for objects, ORCIDs for people or groups, lot numbers for auctions, and so forth. Identifiers are represented in a very similar way to names, but instead use the Identifier class. Identifiers will normally have a classification determining which sort of identifier it is, to distinguish between internal repository system assigned numbers from museum assigned accession numbers, for example. # # As Identifiers and Names use the same `identified_by` property, the JSON will frequently have mixed classes in the array. Unlike `Names`, `Identifiers` are not part of human language and thus cannot have translations or a language associated with them. # # ----- # ##### IMA collection data mapping # # - `accession_number` is the IMA data accession number # - `owner assigned number` is the IMA data id # # ---- # ##### Further reading # # https://linked.art/model/base/#identifiers # # ### Accession Number # # This section represents the artwork's accession number. # # An accession number is a sequential number assigned to each record or item as it is added to a to a library collection or database and which indicates the chronological order of its acquisition (src: https://libanswers.liverpool.ac.uk/faq/181287). # # # # ---- # ##### Further reading # # https://linked.art/model/base/#identifiers # In[13]: import json def objAccession(objProp,object_uri): accession = None accession_number = objProp["accession_number"] if accession_number != "": accession = AccessionNumber(accession_number,value=accession_number) return accession data = factory.toString(objAccession(objProp,object_uri), compact=False) print(data) # In[14]: with open('src/js/visld2.js', 'r') as _jscript: code = _jscript.read() + 'var data = ' + data + '; var selector = "#vis-an"; visjsonld(data, selector); ' Javascript(code) #
# ### Local Number # # This section represents and artwork's local number. # # A local number is an organisation-assigned number for an artwork that has a local scope, i.e. that is not global in scope and is not intended to have meaning beyond the organisation. # # ---- # ##### Further reading # # https://linked.art/model/base/#identifiers # In[15]: def objLocalnumber(objProp,object_uri): localnumber = None id = str(objProp["id"]) if id != "": localnumber = LocalNumber(id,value=id) return localnumber data = (factory.toString(objLocalnumber(objProp,object_uri), compact=False)) print(data) # In[16]: with open('src/js/visld2.js', 'r') as _jscript: code = _jscript.read() + 'var data = ' + data + '; var selector = "#vis-ln"; visjsonld(data, selector); ' Javascript(code) #
# In[ ]: # ### Names # # This section is concerned with names for the artwork. # # ---- # ##### Linked Art data model # # As the `_label` property is intended as internal documentation for the data, it is strongly recommended that every resource that should be rendered to an end user also have at least one specific name. The name could be for an object, a person, a group, an event or anything else. This pattern uses the `identified_by` property, with a `Name` resource. The value of the name is given in the content property of the `Name`. # # It is somewhat unintuitive to think of a name as identifying the resource it is associated with, as names are typically not unique. However, as the name itself is uniquely identified rather than just an anonymous string, they are no longer a shared label and instead the particular instance of a name is uniquely associated with the resource. With this formulation, the name instance does uniquely identify the resource. # # If there is more than one name given, then there should be one that is `classified_as` the primary name for use. This is done by adding the `Primary Name` (aat:300404670) term to it. There should be exactly one primary title given per language. # # Names are also part of human communication, and can have the Linguistic features of the model associated with them, such as having a particular language, or having translations. # # --- # ##### Further reading # # https://linked.art/model/base/#names # # #### Alternate Name # In[17]: def objAlternatename(objProp,object_uri): alternateName = None if "alt_title" in objProp: alt_title = objProp["alt_title"] alternatename = AlternateName(object_uri + "/alternate-name",value=alt_title) return alternateName altname = objAlternatename(objProp,object_uri) if altname is not None: print(factory.toString(altname, compact=False)) # ### Home Page # # This section is concerned with a home page that describes the artwork. # # ---- # ##### Linked Art data model # # A very common scenario is that there is a web page about the object, perhaps managed by a collections management system. For humans, this page is much more useful than the data intended for machines. It can be referenced with the `subject_of` property, and points to a `DigitalObject` which is `classified_as` a web page, or `aat:300264578`. As with digital images, the home page can have a format of "text/html" and other properties. # # ---- # ##### IMA data mapping # - A URL is constructred using a URL prefix `http://collection.imamuseum.org/artwork/` and the artwork's `id` property # # ---- # # ##### Further information # # https://linked.art/model/digital/#home-page # In[18]: def objHomepage(objProp,object_uri): homepage = None id = str(objProp["id"]) homepageId = "http://collection.imamuseum.org/artwork/" + id homepage = LinguisticObject(homepageId, label="Homepage for the Object") homepage.classified_as = Type("http://vocab/getty.edu/aat/300264578", label="Web pages (documents)") homepage.classified_as = Type("http://vocab.getty.edu/aat/300266277", label="home pages") homepage.format = "text/html" return homepage # In[19]: homepage = objHomepage(objProp,object_uri) if homepage is not None: print(factory.toString(homepage, compact=False)) # ### Statements about a Resource - Linguistic Objects # # This section is concerns with the representation of assertions about the artwork. # # ----- # ##### Linked Art data model # # In many cases, current data does not support the level of specificity that the full ontology allows, or the information is simply best expressed in human-readable form. For example, instead of a completely modeled set of parts with materials, many museum collection management systems allow only a single human-readable string for the "medium" or "materials statement". The same is true in many other situations, including rights or allowable usage statements, dimensions, edition statements and so forth. Any time that there is a description of the resource, with or without qualification as to the type of description, then this pattern can be used to record the descriptive text. # # The pattern makes use of the `LinguisticObject` class that is used to identify a particular piece of textual content. These Linguistic Objects are then refered to by any other resource. They maintain the statement's text in the content property, and the language of the statement (if known) in the language property. # # Use cases for this pattern include: # # - General description of the resource # - Materials statement for an object # - Attribution statement for an image # - Biography for a person # - Dimensions statement for a part of an object # # ----- # ##### IMA data mapping # - The IMA data field `created_provenance` is used to construct a provenance statement about the artwork # - The IMA data field `credit_line` is used to construct a credit line statement about the artwork # # ---- # # ##### Further reading # # https://linked.art/model/base/#statements-about-a-resource # # #### Provenance # # This code creates a provenance statement about the artwork. # In[20]: def objProvenance(objProp,object_uri): prov = None if "created_provenance" in objProp: provenance = objProp["created_provenance"] if provenance !="": prov = LinguisticObject(object_uri + "/provenance-statement", value=provenance, label="Provenance Statement about the Object" ) prov.classified_as = Type("http://vocab.getty.edu/aat/300055863", label="provenance (history of ownership)") prov.classified_as = Type("http://vocab.getty.edu/aat/300418049", label="brief texts") return prov prov = objProvenance(objProp,object_uri) if prov is not None: print(factory.toString(prov, compact=False)) # #### Credit Line # # This code created a credit link statement for the artwork. # In[21]: def objCredit(objProp,object_uri): credit = None propCredit = "credit_line" if propCredit in objProp: credit_line = objProp[propCredit] if credit_line != "": credit = LinguisticObject(object_uri + "/credit-line", value=credit_line, label="Credit Line for the Object" ) credit.classified_as = Type("http://vocab.getty.edu/aat/300026687", label="acknowledgements") credit.classified_as = Type("http://vocab.getty.edu/aat/300418049", label="brief texts") return credit # In[22]: credit = objCredit(objProp,object_uri) if credit is not None: print(factory.toString(credit, compact=False)) else: print("No credit description") # ### Production # # This section is concerned with a representation in Linked Art of the production of the artwork. # # ---- # # ##### Linked Art data model # # The first activity in an object's lifecycle is its creation, or `Production`. The relationship to the object that was produced by the activity (`produced`) is added to the general activity model, along with the time, location and actors. This follows the base pattern for activities. # # # ---- # ##### IMA data mapping # # - The IMA data fields are used in the production event representation # - `date_created` # - `date_created_earliest` # - `date_created_latest` # - If a value exists for the `creator` field in the `mapp` dictionary, this is used for the representation of the creator role in the production event. # # ---- # ##### Further reading # # https://linked.art/model/object/production/ # # In[23]: def objProduction(objProp,object_uri): prod = None date_created = "date_created" created_earliest = "date_created_earliest" created_latest = "date_created_latest" if date_created in objProp: prod = Production(object_uri + "/production", label="Production of the Object") labelTimespan = "date unknown" if objProp[date_created] != "": labelTimespan = objProp[date_created] timespan = TimeSpan(object_uri + "/production/timespan", label=labelTimespan) if created_earliest in objProp: timespan.begin_of_the_begin = objProp[created_earliest] if created_latest in objProp: timespan.end_of_the_end = objProp[created_latest] prod.timespan = timespan propCreator = "creator" if propCreator in objProp: creators = objProp[propCreator] id = label = "" for creator in creators: for prop in creator: if "id" == prop: id = creator["id"] if "name" == prop: label = creator["name"] if label != "": actor = Actor(id,label) prod.carried_out_by = actor return prod # In[24]: prod = objProduction(objProp,object_uri) if prod is not None: print(factory.toString(prod, compact=False)) # ### Current Owner and Acquisition # # This section is concerned with a representation of the current owner of the artwork and also with an acquisition event. # # # ---- # ##### Linked Art data model # # Acquisitions are used to describe the transfer of ownership of an object from one owner to the next. The first owner is typically the artist, who would then transfer it to the second owner, to the third owner and so on. The ownership chain can be expressed by repeating this same pattern with the buyer from one acquisition being the seller in the subsequent one. If the previous owner (e.g. the seller if there is a value exchange) or the subsequent owner (e.g. the buyer) is not known for a particular acquisition, then the reference can be left out from the description. # # The acquistion is not necessarily a purchase, it could be a gift, an inheritance or any other method of gaining the right of ownership of an object. # # The model encodes this information with an Acquisition part of the overall Provenance Event. The acquisition is the transfer of the right of ownership of an object (referenced in transferred_title_of) from the seller (in transferred_title_from) to the buyer (in transferred_title_to). # # Each object has its own Acquisition as part of the provenance event, so if a collector buys three paintings from a dealer, then there would be a single Provenance Event with three Acquisitions, all of which transfer the title of a single painting from the dealer to the collector. # # ----- # # ##### IMA data mapping # - The following IMA data fields are used in the representation: # - `accession_date` # - `current_owner` # --`name`, `type`, `type_label`, `name_location` # # ---- # ##### Further reading # # https://linked.art/model/provenance/acquisition/#object-acquisition # # # In[25]: def objCurrentowner(objProp,object_uri): current_owner = None if "current_owner" in objProp and objProp["current_owner"]["name"] != "": cowner = objProp["current_owner"] cowner_name = cowner["name"] cowner_type = cowner["type"] cowner_type_label = cowner["type_label"] current_owner = Group( "http://vocab.getty.edu/ulan/500300517",label=cowner_name) current_owner.classified_as = Type( cowner_type,label="museums (institutions)") acquisition = objAcquisition(objProp,object_uri) if acquisition is not None: current_owner.acquired_title_through = acquisition return current_owner def objAcquisition(objProp,object_uri): acquisition = None if "accession_date" in objProp and objProp["accession_date"] != "": acquisition = Acquisition(object_uri + "/IMA-acquisition", label = "Acquisition of the Object") acquisition.classified_as = Type("http://vocab.getty.edu/aat/300157782", label="acquisition (collections management)") if "name_location" in objProp["current_owner"]: acquisition.took_place_at = Place("http://vocab.getty.edu/tgn/7012924", label=objProp["current_owner"]["location"]) acquisition.timespan = objAcquisitionTimespan(object_uri,objProp["accession_date"]) return acquisition def objAcquisitionTimespan(object_uri,accession_date): timespan = None end = begin = "" if len(accession_date) == 4: begin = accession_date + "-01-01T00:00:00.000Z" end = accession_date + "-12-31T00:00:00.000Z" elif len(accession_date) == 8: begin = accession_date + "01T00:00:00.000Z" end = accession_date if '-02-' in accession_date: end = end + "28" if ('-01-','-03-','-05-','-07-','-08-','-09-','-10-','-12-') in accession_date: end = end + "31" if ('-04-','-06-','-09-','-11-'): end = end + "30" end = end + "T00:00:00" elif len(accession_date) == 10: begin = accession_date + "T00:00:00.000Z" end = accession_date + "T00:00:00.000Z" else: begin = end = "" timespan = TimeSpan(object_uri + "/IMA-acquisition/timespan", label=accession_date) if begin != "": timespan.begin_of_the_begin = begin if end != "": timespan.end_of_the_end = end return timespan # In[26]: current_owner = objCurrentowner(objProp,object_uri) if current_owner is not None: print(factory.toString(current_owner, compact=False)) # ## Custody # This section is concerned with a representation of the custody of an artwork, i.e. the entity responsible for looking after the artwork. # # # ----- # #### Linked Art data model # # Objects are owned by legal entities, such as museum organizations or individual people. However there may be more information about which department is responsible within a museum for the curation of the object. This is the division between acquisitions (the legal ownership of the object) and custody (the responsibility for looking after the object). If the department is known, then it should be either part of the Provenance Event in which the object is acquired, or a separate provenance event if the object was not accessioned by a department and later came under their care, or was transferred between departments. In these latter cases, the ownership does not change, only the custody of the object. # # The department becomes the `current_keeper` of the object, whereas the institution is the `current_owner`. # # ---- # # #### IMA data mapping # IMA data fields used in the representation: # - `current_status` # - `current_owner` # # # ---- # ##### Further reading # # - https://linked.art/model/provenance/custody/#institutional-ownership-departmental-custody # # In[27]: def objCustody(objProp,object_uri): custody = None if "current_status" in objProp and objProp["current_status"] != "": current_status = objProp["current_status"] current_owner = checkCurrentOwner(current_status) if current_owner == False: name = objProp["current_owner"]["name"] type = objProp["current_owner"]["type"] label = objProp["current_owner"]["type_label"] custody = Group(label=name) custody.classified_as = Type(type, label=label) return custody def checkCurrentOwner(current_status): current_owner = False if current_status != "": checkObjStatus = ('Accessioned','Partial Accession') for status in checkObjStatus: if status == current_status: current_owner = True if "IMA-Owned" in current_status: current_owner = True return current_owner custody = objCustody(objProp,object_uri) if custody is not None: print(factory.toString(custody, compact=False)) # ### Membership of Collections and Sets # # This section is concerned with the representation of collections or sets that the artwork may belong to. # # ---- # #### Linked Art data model # # There are many use cases for grouping resources together, often of the same class but sometimes of varying types. These use cases are exemplified in the sections below, and range from the set of objects in an auction lot, to dealer inventories and museum collections, exhibitions, a set of related concepts, or the set of people that share a common feature such as gender or nationality. # # In order to cover all of the use cases with a consistent pattern, we introduce a new `Set` class from outside of CIDOC-CRM. This avoids issues with sets of resources with different types, and the semantics of the identity of objects and collections. If an equivalent class is added into the core CIDOC-CRM ontology in the future, a new major version of the specification will change to using it. # # ##### Sets # # Sets are conceptual groupings, rather than physical ones. The set of objects in a virtual exhibition or simply the set of a person's favorite objects never change their physical state by being part of the Set or not. They are, thus, created by a `Creation`, not by a `Production`. # # Like any core resource, Set must have an id and type, are likely to have additional classifications, and can have `Identifiers` and `Names`. They can have statements made about them, and have member resources. These member resources are included via the `member` property rather than `part`, or via `member_of` from the included resource to the `Set`. # # ----- # # ##### IMA data mapping # The `collection` property in the IMA data is used. # # # # ----- # # ##### Further reading # https://linked.art/model/collection/ # # # In[28]: def objCollection(objProp,object_uri): coll = None if "collection" in objProp: collection = objProp["collection"] coll = Set(object_uri +"/collection/" + collection, label= collection) coll.classified_as = Type("http://vocab.getty.edu/aat/300025976", label="collections (object groupings)") return coll coll = objCollection(objProp,object_uri) if coll is not None: print(factory.toString(coll, compact=False)) # ## Create the full Linked Art JSON-LD representation # # Having looked at different representations in the Linked Art data model, this section now brings these together in one Linked Art JSON-LD document. # # The resulting document can be copy+pasted, and visualised in the JSON-LD playground. Alternatively run the code below to visualise the Linked Art JSON-LD document in the notebook. # # ----- # #### Further Reading # # - [JSON-LD Playground](https://json-ld.org/playground) # # In[29]: def createObjDesc(objProp,objTypes,object_uri): objLA = None objLA = HumanMadeObject() # linked art object for otype in objTypes: if otype in objProp["classification"]: objLA = objTypes[otype] break objLA.id = object_uri objLA._label = objProp["title"] # IDENTIFIED_BY accession = objAccession(objProp,object_uri) localnumber = objLocalnumber(objProp,object_uri) #primaryname = objPrimaryname(objProp,object_uri) # listIds = (accession,localnumber,primaryname) listIds = (accession,localnumber) identified_by = False for id in listIds: if id is not None: identified_by = True break if identified_by == True: objLA.identified_by = [] for id in listIds: if id is not None: objLA.identified_by.append(id) # REFERRED_TO_BY objLA.referred_to_by = None prov = objProvenance(objProp,object_uri) credit = objCredit(objProp,object_uri) referred_to_by = False if prov is not None or credit is not None: referred_to_by = True if referred_to_by == True: objLA.referred_to_by = [] if prov is not None: objLA.referred_to_by.append(prov) # provenance if credit is not None: objLA.referred_to_by.append(credit) # credit line # SUBJECT_OF objLA.subject_of = None homepage = None homepage = objHomepage(objProp,object_uri) if homepage is not None: objLA.subject_of = homepage # home page # PRODUCED_BY objLA.produced_by = None if "creator" in objProp: prod = None prod = objProduction(objProp,object_uri) objLA.produced_by = None if prod is not None: objLA.produced_by = prod # production # MEMBER_OF objLA.member_of = None if "collection" in objProp: coll = None coll = objCollection(objProp,object_uri) if coll is not None: objLA.member_of = coll # collection # CURRENT_KEEPER objLA.current_owner = None custody = None custody = objCustody(objProp,object_uri) if custody is not None: objLA.current_owner = custody # CURRENT_OWNER if "current_owner" in objProp and objProp["current_owner"] != "": current_owner = objCurrentowner(objProp,object_uri) if current_owner is not None: objLA.current_owner = current_owner return objLA objProp = createObjProp(obj,mapp) ObjLA = None objLA = createObjDesc(objProp,objTypes,object_uri) data = factory.toString(objLA, compact=False) f = open("./data/ima/output/json/object_linkedart.json", "w") f.write(data) f.close() print(data) # ## View the Linked Art JSON-LD # # The final Linked Art JSON-LD representation of the object is visualised below. # In[30]: from IPython.display import display, Javascript with open('./src/js/visld.js', 'r') as _jscript: code = _jscript.read() + "var file = './data/ima/output/json/object_linkedart.json';var selector = '#vis3';visjsonld(file, selector); " display(Javascript(code)) #
# In[ ]: