Papers Past newspapers in DigitalNZ

A subset of the digitised newspapers in Papers Past can be searched through DigitalNZ. Using data from the DigitalNZ API we can look at what's available.

In [ ]:
# This cell just sets up some stuff that we'll need later

import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import pandas as pd
from IPython.display import display, FileLink
from pathlib import Path
import altair as alt

alt.data_transformers.enable('default')
alt.data_transformers.disable_max_rows()

s = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])
s.mount('https://', HTTPAdapter(max_retries=retries))

# Make links in Altair open in a new tab
def blank_href():
    return {
        "usermeta": {
            "embedOptions": {
                'loader': {'target': '_blank'}
            }
        }
    }

# register the custom theme under a chosen name
alt.themes.register('blank_href', blank_href)

# enable the newly registered theme
alt.themes.enable('blank_href')

API_URL = 'http://api.digitalnz.org/v3/records.json'
In [ ]:
# Past your API key between the quotes
# You might need to trim off any spaces at the beginning and end
API_KEY = 'YOUR API KEY'
In [ ]:
params = {
    'and[primary_collection][]': 'Papers Past',
    'facets': 'year,collection,placename',
    'facets_per_page': 350,
    'per_page': 0,
    'api_key': API_KEY
}

Total number of articles per year

Hover for details. Click to search for articles in DigitalNZ.

In [ ]:
response = s.get(API_URL, params=params)
data = response.json()
In [ ]:
years = [{'year': int(k), 'total': v} for k, v in data['search']['facets']['year'].items()]
titles = data['search']['facets']['collection']
try:
    del(titles['Papers Past'])
except KeyError:
    pass
In [ ]:
# Fill in any missing years
df_years = pd.DataFrame(years).set_index('year')
min_year = int(df_years.index.min())
max_year = int(df_years.index.max())
idx = sorted(list(range(min_year, max_year + 1)))
df_years = df_years.reindex(idx).reset_index()

# Add a url to search in DigitalNZ
df_years['url'] = df_years['year'].apply(lambda x: f'https://digitalnz.org/records?i[primary_collection]=Papers%20Past&i[year]={x}#/')
In [ ]:
alt.Chart(df_years).mark_bar().encode(
    x='year:O',
    y='total:Q',
    href='url:N',
    tooltip=['year:N', alt.Tooltip('total:Q', format=',')]
).properties(width=800)

Number of articles in each newspaper per year

Hover for details. Click to search for articles in DigitalNZ.

In [ ]:
title_dfs = []

# Loop through titles to download year facets
for title in titles.keys():
    params['and[collection][]'] = title
    response = s.get(API_URL, params=params)
    data = response.json()
    years = [{'year': int(k), 'total': v} for k, v in data['search']['facets']['year'].items()]
    # Fill in missing years
    df_title = pd.DataFrame(years).set_index('year')
    idx = sorted(list(range(min_year, max_year + 1)))
    df_title = df_title.reindex(idx, fill_value=0).reset_index()
    # Add newspaper name
    df_title['newspaper'] = title
    # Add url to search in DigitalNZ
    df_title['url'] = df_title.apply(lambda x: f'https://digitalnz.org/records?i[primary_collection]=Papers%20Past&i[year]={x["year"]}&i[collection]={x["newspaper"]}#/', axis=1)
    title_dfs.append(df_title)
In [ ]:
df_all = pd.concat(title_dfs)
In [ ]:
alt.Chart(df_all).mark_bar().encode(
    x=alt.X('year:O'),
    y=alt.Y('total:Q', title=None),
    facet=alt.Facet('newspaper:N', columns=1, title=None),
    href='url:N',
    tooltip=['newspaper', 'year', alt.Tooltip('total', format=',')]
).properties(width=800, height=50).resolve_scale(
    y='independent'
).configure_view(
    strokeWidth=0
)

Created by Tim Sherratt for the GLAM Workbench. Support this project by becoming a GitHub sponsor.