#!/usr/bin/env python
# coding: utf-8

# # Get a list of species records from the Museums Victoria collection
# 
# The Museums Victoria collection API accepts four `recordtype` values: 'article', 'item', 'species', and 'specimen'. In this notebook we'll build a simple harvester to download all the 'species' records.
# 
# See the Museums Victoria [collection API documentation](https://collections.museumsvictoria.com.au/developers) for more information.

# ## Import what we need

# In[1]:


import requests
from tqdm.auto import tqdm
import pandas as pd
from IPython.display import display, FileLink


# In[2]:


# Base search url
SEARCH_URL = 'https://collections.museumsvictoria.com.au/api/search'


# ## Define some functions

# In[5]:


def get_totals(params):
    '''
    Get the total results and pages from a search.
    '''
    response = requests.get(SEARCH_URL, params=params, headers={'User-Agent': 'Mozilla/5.0'})
    # The total results and pages values are in the API response's headers!
    total_results = int(response.headers['Total-Results'])
    total_pages = int(response.headers['Total-Pages'])
    return (total_results, total_pages)

def harvest_species():
    '''
    Download all the species records, saving the record id, taxon name, and common name.
    Returns a list of species.
    '''
    species = []
    params = {
        'query': ' ',
        'recordtype': 'species',
        'sort': 'date',
        'perpage': 100
    }
    total_results, total_pages = get_totals(params)
    # Loop through the total pages, downloading a page of results at a time
    for page in tqdm(range(1, total_pages + 1)):
        # Update the page value
        params['page'] = page
        # Make a request to the API
        response = requests.get(SEARCH_URL, params=params, headers={'User-Agent': 'Mozilla/5.0'})
        # Loop through the results
        for record in response.json():
            # Look for the taxonomy section of the record
            taxonomy = record['taxonomy']
            if taxonomy:
                # Save species info
                species.append({'id': record['id'], 'taxon_name': taxonomy['taxonName'], 'common_name': taxonomy['commonName']})
    return species


# ## Harvest the records!

# In[ ]:


species = harvest_species()


# ## Convert to a dataframe and save as a CSV

# In[7]:


df = pd.DataFrame(species)
df.head()


# How many species are recorded in the Museum of Victoria collection?

# In[8]:


df.shape


# Save the list as a CSV file so we can make use of it elsewhere

# In[9]:


df.to_csv('museum-victoria-species.csv', index=False)
display(FileLink('museum-victoria-species.csv'))


# ----
# 
# Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.github.io/).  Support me by becoming a [GitHub sponsor](https://github.com/sponsors/wragge)!