The Museums Victoria collection API accepts four recordtype
values: 'article', 'item', 'species', and 'specimen'. In this notebook we'll build a simple harvester to download all the 'species' records.
See the Museums Victoria collection API documentation for more information.
import requests
from tqdm.auto import tqdm
import pandas as pd
from IPython.display import display, FileLink
# Base search url
SEARCH_URL = 'https://collections.museumsvictoria.com.au/api/search'
def get_totals(params):
'''
Get the total results and pages from a search.
'''
response = requests.get(SEARCH_URL, params=params, headers={'User-Agent': 'Mozilla/5.0'})
# The total results and pages values are in the API response's headers!
total_results = int(response.headers['Total-Results'])
total_pages = int(response.headers['Total-Pages'])
return (total_results, total_pages)
def harvest_species():
'''
Download all the species records, saving the record id, taxon name, and common name.
Returns a list of species.
'''
species = []
params = {
'query': ' ',
'recordtype': 'species',
'sort': 'date',
'perpage': 100
}
total_results, total_pages = get_totals(params)
# Loop through the total pages, downloading a page of results at a time
for page in tqdm(range(1, total_pages + 1)):
# Update the page value
params['page'] = page
# Make a request to the API
response = requests.get(SEARCH_URL, params=params, headers={'User-Agent': 'Mozilla/5.0'})
# Loop through the results
for record in response.json():
# Look for the taxonomy section of the record
taxonomy = record['taxonomy']
if taxonomy:
# Save species info
species.append({'id': record['id'], 'taxon_name': taxonomy['taxonName'], 'common_name': taxonomy['commonName']})
return species
species = harvest_species()
df = pd.DataFrame(species)
df.head()
id | taxon_name | common_name | |
---|---|---|---|
0 | species/8583 | Melangyna viridiceps | Common Hover Fly |
1 | species/8307 | Tetractenos glaber | Smooth Toadfish |
2 | species/8815 | Salticidae | Jumping Spider |
3 | species/8456 | Hydromys chrysogaster | Common Water Rat |
4 | species/12377 | Dromaius novaehollandiae | Emu |
How many species are recorded in the Museum of Victoria collection?
df.shape
(1411, 3)
Save the list as a CSV file so we can make use of it elsewhere
df.to_csv('museum-victoria-species.csv', index=False)
display(FileLink('museum-victoria-species.csv'))
Created by Tim Sherratt for the GLAM Workbench. Support me by becoming a GitHub sponsor!