Think of a word... any word.
Lots of interesting cultural heritage data, but...
Do we have the skills?
How do we deliver what’s needed, when it’s needed?
import requests
from credentials import API_KEY
response = requests.get('http://api.digitalnz.org/v3/records.json', params={'api_key': API_KEY, 'text': '', 'facets': 'decade', 'facets_per_page': 25})
data = response.json()
print(' There are {:,} items in DigtalNZ!'.format(data['search']['result_count']))
import pandas as pd
import altair as alt
alt.renderers.enable('notebook')
decades = data['search']['facets']['decade']
decades_df = pd.Series(decades).to_frame().reset_index()
decades_df.columns = ['decade', 'count']
alt.Chart(decades_df).mark_bar().encode(
x = 'decade:O',
y = 'count:Q',
tooltip = alt.Tooltip('count', format=',')
)
params = {
'api_key': API_KEY,
'text': 'possum OR opossum',
'and[display_collection][]': 'Papers Past',
'facets': 'year,collection',
'facets_per_page': 100
}
response = requests.get('http://api.digitalnz.org/v3/records.json', params=params)
data = response.json()
titles = data['search']['facets']['collection']
titles_df = pd.Series(titles).to_frame().reset_index()
titles_df.columns = ['title', 'count']
titles_df.head()
years = data['search']['facets']['year']
years_df = pd.Series(years).to_frame().reset_index()
years_df.columns = ['year', 'count']
years_df['url'] = 'https://paperspast.natlib.govt.nz/newspapers?query={0}&start_date=01-01-{1}&end_date=31-12-{1}'.format(params['text'], years_df['year'][0])
years_df.head()
c1 = alt.Chart(years_df, width=600).mark_line(point=True).encode(
x = 'year(year):T',
y = 'count:Q',
tooltip = [alt.Tooltip('year(year):T', title='year'), alt.Tooltip('count', format=',')],
href='url:N'
).properties(
height=300,
width=500
)
c2 = alt.Chart(titles_df[1:11]).mark_bar().encode(
x = 'count:Q',
y = 'title:O',
tooltip = alt.Tooltip('count', format=',')
).properties(
height=300,
width=200
)
c1 | c2