Here's another way you can get a random work from Trove's book
, article
, picture
, map
, music
, or collection
zones. This approach is particularly useful if you want to get a random result from a search, or want to apply a variety of facets. It's not as quick as pinging random work ids at Trove, but it's more flexible.
Basically this method gets all the available facets for a particular search. If the search has more than 100 results, it chooses one of the facets at random and applies it. It keeps doing this until the search returns less that 100 results. Then it chooses a work at random from the results. If you don't supply a query, it uses a random stop word to mix things up a bit.
The problem with this approach is that facets can't always be extracted from records, and there's no way of finding records without a particular facet. For example, you can use the year
facet to limit results to a particular year, but what about records that don't have a year
value. Once you start using that facet, they're invisible. I'm worried that this will mean that certain parts of Trove will never be surfaced. It would of course be much better if Trove just supported random sorting so I didn't have to do all these stupid workarounds.
Collection searches (ie using NUC identifiers) are particularly tricky, because items from a single collection can share very similar facet values. To try and limit the results in this sort of situation, I've provided a couple of extra parameters:
add_word
– adds a random stopword to the queryadd_number
– adds a random two digit number to the query (useful if the records use numeric identifiers)These can help increase the degree of randomness, but again I suspect some parts of collections will never be reached.
import json
import os
import random
import requests
from IPython.display import HTML, display
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
s.mount("https://", HTTPAdapter(max_retries=retries))
s.mount("http://", HTTPAdapter(max_retries=retries))
with open("stopwords.json", "r") as json_file:
STOPWORDS = json.load(json_file)
API_URL = "http://api.trove.nla.gov.au/v2/result"
%%capture
# Load variables from the .env file if it exists
# Use %%capture to suppress messages
%load_ext dotenv
%dotenv
# Insert your Trove API key
API_KEY = "YOUR API KEY"
# Use api key value from environment variables if it is available
if os.getenv("TROVE_API_KEY"):
API_KEY = os.getenv("TROVE_API_KEY")
def get_facet_terms(terms):
"""
Get all the terms in a facet.
"""
facet_terms = []
for term in terms:
facet_terms.append(term["search"])
if "term" in term:
facet_terms += get_facet_terms(term["term"])
return facet_terms
def get_facets(data):
"""
Get the names/terms of facets available from a search.
"""
facets = []
for facet in data["response"]["zone"][0]["facets"]["facet"]:
if facet["name"][:3] != "adv" and facet["name"] != "decade":
terms = get_facet_terms(facet["term"])
facets.append({"facet": facet["name"], "terms": terms})
return facets
def set_query(params, query=None, add_word=False, add_number=False):
"""
Add a 'q' value to the parameters, including random words and numbers if required.
"""
random_word = random.choice(STOPWORDS)
random_number = random.randrange(1, 100)
if query:
if add_word:
params["q"] = f'{query} "{random_word}"'
elif add_number:
params["q"] = f'{query} "{random_number:02}"'
else:
params["q"] = query
else:
params["q"] = f'"{random_word}"'
return params
def get_random_work_from_zone(zone, query, **kwargs):
total = 0
applied_facets = []
params = {
"zone": zone,
"encoding": "json",
# Keeping this at 0 until we've filtered the results speeds things up
"n": "0",
"key": API_KEY,
"facet": "all",
"include": "links",
}
params["q"] = query
for key, value in kwargs.items():
params[f"l-{key}"] = value
applied_facets.append(key)
response = s.get(API_URL, params=params)
data = response.json()
total = int(data["response"]["zone"][0]["records"]["total"])
facets = get_facets(data)
facets[:] = [f for f in facets if f.get("facet") not in applied_facets]
# Keep going until we either have less than 100 results or we run out of facets
while total > 100 and len(facets) > 0:
# print(f'Facets: {len(facets)}')
# Select another facet
new_facet = random.choice(facets)
# Add it to the applied list
applied_facets.append(new_facet["facet"])
# Add the new facet as a parameter
params[f'l-{new_facet["facet"]}'] = random.choice(new_facet["terms"])
# Get the new results
response = s.get(API_URL, params=params)
data = response.json()
# Get the facets available from the new search
facets = get_facets(data)
# Remove facets from the list that have already been applied
facets[:] = [f for f in facets if f.get("facet") not in applied_facets]
total = int(data["response"]["zone"][0]["records"]["total"])
# print(total)
# print(response.url)
if total > 0:
params["n"] = "100"
# Cleaning up a bit
params.pop("facet", None)
response = s.get(API_URL, params=params)
data = response.json()
work = random.choice(data["response"]["zone"][0]["records"]["work"])
return work
def get_zones(data):
"""
Find which zones have results in them.
"""
zones = []
for zone in data["response"]["zone"]:
if int(zone["records"]["total"]) > 0:
zones.append(zone["name"])
return zones
def get_random_work(zone=None, query=None, add_word=False, add_number=False, **kwargs):
tries = 0
zones = []
params = {
"encoding": "json",
"n": "0",
"key": API_KEY,
}
if zone:
params["zone"] = zone
else:
params["zone"] = "book,article,picture,map,music,collection"
params = set_query(params, query, add_word)
# Add any supplied facets
for key, value in kwargs.items():
params[f"l-{key}"] = value
# Make sure that at least some zones have results
while len(zones) == 0 and tries <= 10:
params = set_query(params, query, add_word, add_number)
response = s.get(API_URL, params=params)
# print(response.url)
data = response.json()
zones = get_zones(data)
tries += 1
if len(zones) > 0:
work = get_random_work_from_zone(
zone=random.choice(zones), query=params["q"], **kwargs
)
return work
This is a collection were facets aren't terribly useful in slicing up the results because the range of values is very limited. However, items in this collection do have numeric identifiers, and so including the add_number
parameter seems to help divide it up into chunks of less than 100.
get_random_work(query='(nuc:"VMUS:CHIA")', add_number=True)
{'id': '197894996', 'url': '/work/197894996', 'troveUrl': 'https://trove.nla.gov.au/work/197894996', 'title': 'Chinatown, Darwin', 'contributor': ['Jack Buscall'], 'issued': '1939-1941', 'type': ['Photograph'], 'rights': 'Reproduction rights owned by the Northern Territory Library.', 'holdingsCount': 1, 'versionCount': 1, 'hasCorrections': 'N', 'relevance': {'score': '31.259758', 'value': 'very relevant'}, 'snippet': ". <b>32</b>. Also titled: 'Darwin's Chinatown before World War 2' Copyprint Location: Cavenagh Street, Bennett", 'identifier': [{'type': 'url', 'linktype': 'fulltext', 'value': 'http://www.chia.chinesemuseum.com.au/objects/D002314.htm'}, {'type': 'url', 'linktype': 'thumbnail', 'value': 'http://www.territorystories.nt.gov.au/bitstream/handle/10070/1855/04375.JPG.jpg'}]}
Using the new imageInd
parameter in the query to find records with thumbnails.
get_random_work(zone="picture", q="imageInd:thumbnail", format="Photograph")
{'id': '238334520', 'url': '/work/238334520', 'troveUrl': 'https://trove.nla.gov.au/work/238334520', 'title': 'Photograph - Post Card', 'contributor': ["Valentine's"], 'issued': 1908, 'type': ['Photograph'], 'rights': ['You may download, display, print or reproduce this image in an unaltered form and with acknowledgement to Phillip Island and District Historical Society Inc. for personal, educational and private research use. If you wish to use it for any other purposes you must obtain permission from Phillip Island and District Historical Society Inc..', 'Attribution Non Commercial ShareAlike 3.0 Unported Creative Commons', 'http://creativecommons.org/licenses/by-nc-sa/3.0/'], 'holdingsCount': 1, 'versionCount': 1, 'hasCorrections': 'N', 'relevance': {'score': '0.0059571834', 'value': 'vaguely relevant'}, 'snippet': ' Phillip Island"\nLetter written by Marie of Everton Cowes to Millie addressed to Mrs <b>H</b>. Blamey, "Roslyn', 'identifier': [{'type': 'url', 'linktype': 'fulltext', 'linktext': 'Explore further with Victorian Collections', 'value': 'https://victoriancollections.net.au/items/56c400582162f10e68c9dea7'}, {'type': 'url', 'linktype': 'thumbnail', 'value': 'https://victoriancollections.net.au/media/collectors/4f729f5b97f83e0308601629/items/56c400582162f10e68c9dea7/item-media/5ee6cc6521ea671d3ca61101/item-130x0.jpg'}]}
You can include as many additional facets as you want. Here's an example using publictag
.
get_random_work(publictag="Japan")
{'id': '6411867', 'url': '/work/6411867', 'troveUrl': 'https://trove.nla.gov.au/work/6411867', 'title': 'The enigma of Japanese power : people and politics in a stateless nation / Karel van Wolferen', 'contributor': ['Wolferen, Karel Van'], 'issued': '1988-1993', 'type': ['Book', 'Book/Illustrated', 'Audio book'], 'isPartOf': {'type': 'series', 'value': 'Tut books'}, 'holdingsCount': 47, 'versionCount': 12, 'hasCorrections': 'N', 'relevance': {'score': '0.0022870363', 'value': 'vaguely relevant'}, 'identifier': [{'type': 'url', 'linktype': 'restricted', 'linktext': 'source', 'value': 'http://www.loc.gov/catdir/description/random048/89040552.html'}, {'type': 'url', 'linktype': 'restricted', 'linktext': 'Direct link to full text: http://openlibrary.org/details/enigmaofjapanese00wolf', 'value': 'http://openlibrary.org/books/OL2062341M'}, {'type': 'url', 'linktype': 'restricted', 'linktext': 'HathiTrust Digital Library, Limited view (search only)', 'value': 'http://catalog.hathitrust.org/api/volumes/oclc/19130854.html'}, {'type': 'url', 'linktype': 'restricted', 'linktext': 'Free eBook from the Internet Archive', 'value': 'https://archive.org/details/enigmaofjapanese00wolf'}, {'type': 'url', 'linktype': 'restricted', 'linktext': 'Direct link to full text: http://openlibrary.org/details/enigmaofjapanese00kare', 'value': 'http://openlibrary.org/books/OL16828742M'}, {'type': 'url', 'linktype': 'restricted', 'linktext': 'Direct link to full text: http://openlibrary.org/details/enigmaofjapanese00wolf_0', 'value': 'http://openlibrary.org/books/OL2217099M'}, {'type': 'url', 'linktype': 'notonline', 'linktext': 'Publisher description', 'value': 'http://www.loc.gov/catdir/description/random048/89040552.html'}, {'type': 'url', 'linktype': 'notonline', 'linktext': 'Additional information and access via Open Library', 'value': 'https://openlibrary.org/books/OL2062341M'}]}
Just to cheer myself up a bit...
record = get_random_work(zone="picture", q="imageInd:thumbnail", format="Photograph")
for link in record["identifier"]:
if link["linktype"] == "thumbnail":
url = link["value"]
break
display(HTML(f'<img src="{url}">'))
%%timeit
get_random_work()
3.05 s ± 1.13 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
Created by Tim Sherratt for the GLAM Workbench.