#!/usr/bin/env python # coding: utf-8 # In[1]: try: import IPython except: get_ipython().run_line_magic('pip', 'install IPython') import IPython from IPython.display import display, IFrame, HTML, Javascript from IPython.core.display import display, HTML HTML("""""") # # Collections Data to Linked Art # # Ashmolean Museum, Oxford, UK # # This notebook provides a guided step through the process of transforming Cultural Heritage collections data to Linked Art JSON-LD. # # The Ashmolean Museum in Oxford, UK holds many artworks by the artist John Ruskin. At present, to retrieve collection data for the artworks it was necessary to screenscrape the web pages for the artworks. This was achieved with the Python library `BeautifulSoup`. From the documentation: # #
# Beautiful Soup is a Python library for pulling data out of HTML and XML files. It works with your favorite parser to provide idiomatic ways of navigating, searching, and modifying the parse tree. ## # # The HTML was derived from a collections search via https://collections.ashmolean.org/collection/search with the HTML page saved locally to a [data file]( ./data/ashmolean/ash.html). # # #### Further Reading # # - [BeautifulSoup Python library](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) # - [Ashmolean Museum Collection search](https://collections.ashmolean.org/collection/search) # - [Ruskin search as HTML file](./data/ashmolean/ash.html) # - [Ashmolean Museum](https://ashmolean.web.ox.ac.uk/) # In[5]: outputdir = "./data/ruskin/output/json/" try: import pandas as pd except: get_ipython().system('pip install pandas') import pandas as pd import requests try: from bs4 import BeautifulSoup except: get_ipython().run_line_magic('pip', 'install bs4') from bs4 import BeautifulSoup import re #cromulent from cromulent.model import factory # custom linked art coding from lib import linkedart as la selectOptions = [] selectOptions.append(("Please select an artwork", "")) baseURI = "https://collections.ashmolean.org/collection/" ash_file = './data/ashmolean/ash.html' # open HTML file html_text = open(ash_file, 'r').read() # create soup soup = BeautifulSoup(html_text, 'html.parser') # iterate through artwork descriptions in HTML for artwork in soup.find_all('div',attrs={"class":"list-inner"}): # title title = artwork.find('h3').string # id id = artwork.find('h3').find('a').get('href') id = id[-6:] # creator creator = [{"id": "https://collections.ashmolean.org/collection/search/trigger/person_id/value/7800", "name":"John Ruskin","role":"Artist"}] # accession number accession_number = artwork.find('span').string accession_number = accession_number.replace("Accession no. ", "") # image url image_url = artwork.find("img").get("src") # date created date_created = artwork.find("p").string date_created = date_created.replace("Date: ","") objProp = { "classification":"", "homepage":"https://collections.ashmolean.org/collection/browse-9148/object/" + id, "id":id, "title":title, "creator":creator, "accession_number":accession_number, "image_url":image_url, "date_created":date_created, "current_owner" : {"name":"Ashmolean Museum", "location":"Oxford, England, UK", "type": "http://vocab.getty.edu/aat/300312281" , "type_label": ""} } # look for year in value years = re.findall('(\d{4})', date_created) if len(years) == 1: objProp["date_created_earliest"] = years[0] objProp["date_created_latest"] = years[0] elif len(years) == 2: objProp["date_created_earliest"] = years[0] objProp["date_created_latest"] = years[1] else: # use Ruskins birth/death years objProp["date_created_earliest"] = 1819 objProp["date_created_latest"] = 1900 object_uri = baseURI + id # create drop down options for jsonld vis title = objProp["title"] + " (" + objProp["id"] + ".json)" selectOptions.append((title, objProp["id"] + ".json")) objLA = la.createObjDesc(objProp,la.objTypes,object_uri) # write to file text_file = open(outputdir + objProp["id"] + ".json", "wt") n = text_file.write(factory.toString(objLA, compact=False)) text_file.close() display(objProp["title"]) display(pd.DataFrame(objProp, index=[0])) # ### Visualisation of the Linked Art JSON-LD # # The Linked Art JSON-LD representation can be viewed in a data visualisation below. Select an artwork from the dropdown menu. # In[7]: try: import ipywidgets except: get_ipython().run_line_magic('pip', 'install ipywidgets') import ipywidgets from ipywidgets import Layout, FileUpload from IPython.display import display, IFrame, HTML, Image, Javascript try: import json except: get_ipython().system('pip install json') import json def dropdown_eventhandler_tate(change): with open('./src/js/visld.js', 'r') as _jscript: outputdir = "./data/ruskin/output/json/" code = _jscript.read() + "var file = '" + outputdir + change.new + "';var selector = '#visash';visjsonld(file, selector); " display(Javascript(code)) with open( outputdir + change.new) as json_file: artwork = json.load(json_file) if ("representation" in artwork): image = artwork["representation"][0]["id"] display(Javascript("document.getElementById('artworkash').src = '" + image + "';")) else: display(Javascript("document.getElementById('artworkash').src = '';")) selectObject = ipywidgets.Dropdown(options=selectOptions) selectObject.observe(dropdown_eventhandler_tate, names='value') display(selectObject) # # #