Created in October-December 2022 for the National Library of Scotland's Data Foundry by Gustavo Candela, National Librarian’s Research Fellowship in Digital Scholarship 2022-23
This dataset represents the descriptive metadata from the Moving Image Archive catalogue, which is Scotland’s national collection of moving images.
Import the libraries required to explore the summary of each record included in the dataset to present a word cloud
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import FOAF, RDF, DCTERMS, VOID, DC, SKOS
import pandas as pd
*Note: The variable domain could be updated to the domain of the organisation (e.g., https://data.nls.uk).
domain = 'https://example.org/'
g = Graph()
g.bind("foaf", FOAF)
g.bind("rdf", RDF)
g.bind("dcterms", DCTERMS)
g.bind("dc", DC)
g.bind("void", VOID)
g.bind("skos", SKOS)
schema = Namespace("https://schema.org/")
g.bind("schema", schema)
edm = Namespace("http://www.europeana.eu/schemas/edm/")
g.bind("edm", edm)
nls = URIRef(domain + "organisation/nls")
g.add((nls, RDF.type, schema.Organization))
g.add((nls, schema.url, URIRef("https://www.nls.uk/")))
g.add((nls, schema.logo, URIRef("https://www.nls.uk/images/nls-logo.png")))
g.add((nls, schema.name, Literal("National Library of Scotland")))
g.add((nls, DC.title, Literal("National Library of Scotland")))
<Graph identifier=Nb53b3247e69445b7a988152ad5e7c0fb (<class 'rdflib.graph.Graph'>)>
df = pd.read_csv ('../data/output/movingImageArchive.csv', names=('title','author','authorOrganisation',\
'author720','place_publication',\
'date','extent','credits',\
'subjects','summary','details','link','geographicNames',\
'contentType','mediaType','carrierType','generalNote','thumbnail'))
print(df)
df = df.reset_index() # make sure indexes pair with number of rows
title author \ 0 title author 1 GLASGOW TRAMS AND BOTANIC GARDENS RUSSELL, Stanley Livingstone 2 LAST DAY OF THE TRAMS, GLASGOW NaN 3 INTO THE MISTS NaN 4 PASSING OF THE TRAMCAR, the NaN ... ... ... 20604 N.P. NASSAU BAY Ship No. 689 NaN 20605 DREDGING IN THE RIVER TEES NaN 20606 AUTOMATION ON A SUCTION DREDGE NaN 20607 QUEEN ELIZABETH Ship No. 552 NaN 20608 RUAHINE NaN authorOrganisation author720 place_publication \ 0 authorOrganisation author720 place_publication 1 NaN NaN [Place of production not identified] 2 NaN NaN [Place of production not identified] 3 NaN NaN [Place of production not identified] 4 NaN NaN [Place of production not identified] ... ... ... ... 20604 NaN NaN [Place of production not identified] 20605 NaN NaN [Place of production not identified] 20606 NaN NaN [Place of production not identified] 20607 NaN NaN [Place of production not identified] 20608 NaN NaN [Place of production not identified] date extent \ 0 date extent 1 1950 (2:00 mins) : 2 1962 (28:00 mins) : 3 1956 (10:04 mins) : 4 1962 (63:36 mins) : ... ... ... 20604 1964 (2min38sec) : 20605 1965 [] 20606 1966 (16:38 mins) : 20607 1940 (5min24sec) : 20608 1951 (12:26 mins) : credits \ 0 credits 1 [filmed by Stanley L. Russell, Thames and Clyde]. 2 [filmed by SAAC]. 3 [filmed by W.S. Dobson]. 4 NaN ... ... 20604 NaN 20605 NaN 20606 Producer, The Stephen Group Film Unit. -- Cred... 20607 NaN 20608 NaN subjects \ 0 subjects 1 Bus Stations and Depots -- Buses and Coaches, ... 2 Transport 3 Ceremonies -- Emotions, Attitudes and Behaviou... 4 Ceremonies -- Transport ... ... 20604 NaN 20605 NaN 20606 NaN 20607 Employment, Industry and Industrial Relations ... 20608 Carriages -- Ceremonies -- Ships and Shipping summary \ 0 summary 1 The Botanic Gardens, Glasgow with shots of the... 2 Footage of the last trams to run in Glasgow, a... 3 The story of the last Edinburgh tram. Shots o... 4 Footage of the last tram to run in Glasgow. Th... ... ... 20604 Launching of vessel. 20605 Simons Lobnitz film made by Stephen Group Film... 20606 The film depicts the workings and operation of... 20607 Built and engineered by John Brown & Co. Ltd. ... 20608 Footage of "Ruahine" ship being launched and t... details \ 0 details 1 without sound, colour 2 without sound, colour 3 without sound, colour 4 without sound, colour ... ... 20604 without sound, colour 20605 without sound, colour 20606 with sound, colour 20607 without sound, black and white 20608 without sound, black and white & colour link geographicNames \ 0 link geographicNames 1 http://movingimage.nls.uk/film/0001 Glasgow 2 http://movingimage.nls.uk/film/0002 Glasgow 3 http://movingimage.nls.uk/film/0004 Edinburgh 4 http://movingimage.nls.uk/film/0005 Glasgow ... ... ... 20604 http://movingimage.nls.uk/film/UCS0210 NaN 20605 http://movingimage.nls.uk/film/UCS0211 NaN 20606 http://movingimage.nls.uk/film/UCS0212 NaN 20607 http://movingimage.nls.uk/film/UCS0213 NaN 20608 http://movingimage.nls.uk/film/UCS0214 Dunbartonshire contentType mediaType \ 0 contentType mediaType 1 two-dimensional moving image -- rdacontent unspecified -- rdamedia 2 two-dimensional moving image -- rdacontent unspecified -- rdamedia 3 two-dimensional moving image -- rdacontent unspecified -- rdamedia 4 two-dimensional moving image -- rdacontent unspecified -- rdamedia ... ... ... 20604 two-dimensional moving image -- rdacontent unspecified -- rdamedia 20605 two-dimensional moving image -- rdacontent unspecified -- rdamedia 20606 two-dimensional moving image -- rdacontent unspecified -- rdamedia 20607 two-dimensional moving image -- rdacontent unspecified -- rdamedia 20608 two-dimensional moving image -- rdacontent unspecified -- rdamedia carrierType generalNote \ 0 carrierType generalNote 1 unspecified -- rdacarrier ['non-fiction; documentary '] 2 unspecified -- rdacarrier ['non-fiction; documentary amateur '] 3 unspecified -- rdacarrier ['non-fiction; amateur '] 4 unspecified -- rdacarrier ['non-fiction; sponsored '] ... ... ... 20604 unspecified -- rdacarrier ['non-fiction'] 20605 unspecified -- rdacarrier ['non-fiction'] 20606 unspecified -- rdacarrier ['non-fiction'] 20607 unspecified -- rdacarrier ['non-fiction; technical '] 20608 unspecified -- rdacarrier ['non-fiction; technical '] thumbnail 0 thumbnail 1 NaN 2 http://deriv.nls.uk/dcn19/1358/3808/135838082.... 3 http://deriv.nls.uk/dcn19/7524/75247309.19.jpg 4 NaN ... ... 20604 NaN 20605 NaN 20606 NaN 20607 NaN 20608 NaN [20609 rows x 18 columns]
for index, row in df.iterrows():
if index != 0:
video = URIRef(domain + row["link"].replace("http://movingimage.nls.uk/","").replace(" ","").strip())
g.add((video, RDF.type, URIRef("https://schema.org/VideoObject")))
g.add((video, schema.sourceOrganization, nls))
if pd.notnull(row["title"]):
g.add((video, DC.title, Literal(row["title"].strip())))
g.add((video, schema.name, Literal(row["title"].strip())))
if pd.notnull(row["extent"]):
g.add((video, schema.duration, Literal(row["extent"].strip())))
if pd.notnull(row["thumbnail"]):
g.add((video, schema.thumbnail, URIRef(row["thumbnail"].strip())))
if pd.notnull(row["credits"]):
g.add((video, schema.creditText, Literal(row["credits"].strip())))
if pd.notnull(row["summary"]):
g.add((video, schema.abstract, Literal(row["summary"].strip())))
if pd.notnull(row["details"]):
g.add((video, schema.videoQuality, Literal(row["details"].strip())))
if pd.notnull(row["date"]):
g.add((video, schema.datePublished, Literal(row["date"].strip())))
g.add((video, DC.date, Literal(row["date"].strip())))
if pd.notnull(row["link"]):
g.add((video, schema.identifier, URIRef(row["link"].replace(" - du", "").strip())))
g.add((video, DC.identifier, URIRef(row["link"].replace(" - du", "").strip())))
if pd.notnull(row["subjects"]):
subjects = row["subjects"].split("--")
for r in subjects:
g.add((video, DC.subject, Literal(r.strip())))
if pd.notnull(row["geographicNames"]):
geographicNames = row["geographicNames"].split("--")
for r in geographicNames:
r = r.replace(",","")
r = r.replace(" ","")
place = URIRef(domain + 'location/' + r.lower().strip())
g.add((video, DCTERMS.spatial, place))
g.add((place, RDF.type, schema.Place))
g.add((place, RDF.type, edm.Place))
g.add((place, SKOS.prefLabel, Literal(r.strip())))
g.add((place, schema.name, Literal(r.strip())))
if pd.notnull(row["author"]):
authors = row["author"].split("--")
for r in authors:
authorText = r;
if "/" in authorText:
authorText = authorText[0:authorText.index("/")-1]
authorText = authorText.lower().strip()
authorText = authorText.replace("’", "")
authorText = authorText.replace(".", "")
authorText = authorText.replace("(", "")
authorText = authorText.replace(")", "")
authorText = authorText.replace(",", "")
authorText = authorText.replace("‘", "")
authorText = authorText.replace(" ", "")
author = URIRef(domain + 'author/' + authorText)
g.add((video, schema.author, author))
g.add((author, RDF.type, schema.Person))
g.add((author, RDF.type, FOAF.Person))
g.add((author, SKOS.prefLabel, Literal(r.strip())))
g.add((author, schema.name, Literal(r.strip())))
g.add((author, FOAF.name, Literal(r.strip())))
if pd.notnull(row["author720"]):
authors = row["author720"].split("--")
for r in authors:
authorText = r;
if "/" in authorText:
authorText = authorText[0:authorText.index("/")-1]
authorText = authorText.lower().strip()
authorText = authorText.replace("’", "")
authorText = authorText.replace(".", "")
authorText = authorText.replace("(", "")
authorText = authorText.replace(")", "")
authorText = authorText.replace(",", "")
authorText = authorText.replace("‘", "")
authorText = authorText.replace(" ", "")
author = URIRef(domain + 'author/' + authorText)
g.add((video, schema.author, author))
g.add((author, RDF.type, schema.Person))
g.add((author, RDF.type, FOAF.Person))
g.add((author, SKOS.prefLabel, Literal(r.strip())))
g.add((author, schema.name, Literal(r.strip())))
g.add((author, FOAF.name, Literal(r.strip())))
if pd.notnull(row["authorOrganisation"]) :
authors = row["authorOrganisation"].split("--")
for r in authors:
authorText = r;
if "/" in authorText:
authorText = authorText[0:authorText.index("/")-1]
authorText = authorText.lower().strip()
authorText = authorText.replace("’", "")
authorText = authorText.replace(".", "")
authorText = authorText.replace("(", "")
authorText = authorText.replace(")", "")
authorText = authorText.replace(",", "")
authorText = authorText.replace("‘", "")
authorText = authorText.replace(" ", "")
author = URIRef(domain + 'organization/' + authorText)
g.add((video, schema.author, author))
g.add((author, RDF.type, schema.Organization))
g.add((author, RDF.type, FOAF.Organization))
g.add((author, SKOS.prefLabel, Literal(r.strip())))
g.add((author, schema.name, Literal(r.strip())))
g.add((author, FOAF.name, Literal(r.strip())))
g.serialize(destination="../rdf/dataset.ttl")
<Graph identifier=Nb53b3247e69445b7a988152ad5e7c0fb (<class 'rdflib.graph.Graph'>)>