#!/usr/bin/env python # coding: utf-8 # # Get a list of species records from the Museums Victoria collection # # The Museums Victoria collection API accepts four `recordtype` values: 'article', 'item', 'species', and 'specimen'. In this notebook we'll build a simple harvester to download all the 'species' records. # # See the Museums Victoria [collection API documentation](https://collections.museumsvictoria.com.au/developers) for more information. # ## Import what we need # In[1]: import requests from tqdm.auto import tqdm import pandas as pd from IPython.display import display, FileLink # In[2]: # Base search url SEARCH_URL = 'https://collections.museumsvictoria.com.au/api/search' # ## Define some functions # In[5]: def get_totals(params): ''' Get the total results and pages from a search. ''' response = requests.get(SEARCH_URL, params=params, headers={'User-Agent': 'Mozilla/5.0'}) # The total results and pages values are in the API response's headers! total_results = int(response.headers['Total-Results']) total_pages = int(response.headers['Total-Pages']) return (total_results, total_pages) def harvest_species(): ''' Download all the species records, saving the record id, taxon name, and common name. Returns a list of species. ''' species = [] params = { 'query': ' ', 'recordtype': 'species', 'sort': 'date', 'perpage': 100 } total_results, total_pages = get_totals(params) # Loop through the total pages, downloading a page of results at a time for page in tqdm(range(1, total_pages + 1)): # Update the page value params['page'] = page # Make a request to the API response = requests.get(SEARCH_URL, params=params, headers={'User-Agent': 'Mozilla/5.0'}) # Loop through the results for record in response.json(): # Look for the taxonomy section of the record taxonomy = record['taxonomy'] if taxonomy: # Save species info species.append({'id': record['id'], 'taxon_name': taxonomy['taxonName'], 'common_name': taxonomy['commonName']}) return species # ## Harvest the records! # In[ ]: species = harvest_species() # ## Convert to a dataframe and save as a CSV # In[7]: df = pd.DataFrame(species) df.head() # How many species are recorded in the Museum of Victoria collection? # In[8]: df.shape # Save the list as a CSV file so we can make use of it elsewhere # In[9]: df.to_csv('museum-victoria-species.csv', index=False) display(FileLink('museum-victoria-species.csv')) # ---- # # Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.github.io/). Support me by becoming a [GitHub sponsor](https://github.com/sponsors/wragge)!