You can experiment with the DigitalNZ search API by entering values in the form below. For more information on the available parameters, see the API documentation.
import requests
import ipywidgets as widgets
from IPython.display import HTML, Markdown
import ipyvuetify as v
import json
import IPython
from pathlib import Path
import pandas as pd
import pprint
# Display code highlighting properly
def display_source(code, language='json'):
def _jupyterlab_repr_html_(self):
from pygments import highlight
from pygments.formatters import HtmlFormatter
fmt = HtmlFormatter()
style = "<style>{}\n{}</style>".format(
fmt.get_style_defs(".output_html"), fmt.get_style_defs(".jp-RenderedHTML")
)
return style + highlight(self.data, self._get_lexer(), fmt)
# Replace _repr_html_ with our own version that adds the 'jp-RenderedHTML' class
# in addition to 'output_html'.
IPython.display.Code._repr_html_ = _jupyterlab_repr_html_
return IPython.display.Code(data=code, language=language)
API_URL = 'http://api.digitalnz.org/v3/records.json'
# LOAD FACET DATA
def load_facet(facet):
df = pd.read_csv(Path('facets', f'{facet}.csv'))
items = [str(s) for s in df['value'].to_list()]
return sorted(items, key=str.casefold)
facets = [
'category',
'creator',
'placename',
'year',
'decade',
'century',
'language',
'content_partner',
'rights',
'collection',
'usage',
'copyright',
'dc_type',
'format',
'subject',
'primary_collection'
]
# Load values for selected facets so these can be used in dropdown selects
# These facets all have less than 1000 unique values
facet_values = {}
for facet in ['category', 'usage', 'content_partner', 'century', 'language', 'copyright', 'primary_collection', 'year', 'creator']:
facet_values[facet] = load_facet(facet)
# API QUERY FUNCTIONS
def get_results(params):
'''
Retrieve and display API request results.
'''
results_out.clear_output()
response = requests.get(API_URL, params=params)
data = response.json()
query_url = response.url
with results_out:
display(Markdown('### API request parameters\n\n'))
display(display_source(pprint.pformat(params, indent=4), 'python'))
display(Markdown('### API request url'))
display(HTML(f'<a href="{query_url}">{query_url}</a>'))
display(Markdown('### API response'))
display(display_source(json.dumps(data, indent=2)))
def create_query(widget, event, data):
'''
Build the API request url from the form values.
'''
params = {
'api_key': api_key.v_model,
}
if keywords.v_model:
params['text'] = keywords.v_model
if geo_bbox.v_model:
params['geo_bbox'] = geo_bbox.v_model
for f in filters:
if f['value'].v_model and f['field'].v_model:
params[f'{f["type"].v_model}[{f["field"].v_model}][]'] = f['value'].v_model
if select_facets.v_model:
params['facets'] = ','.join(select_facets.v_model)
params['facets_per_page'] = facets_per_page.v_model
params['facets_page_number'] = facets_page_number.v_model
if sort_select.v_model:
params['sort'] = sort_select.v_model
if sort_direction.v_model:
params['direction'] = sort_direction.v_model
params['per_page'] = per_page.v_model
params['page'] = page_number.v_model
get_results(params)
# FORM VARIABLES
filters = []
filter_num = 2
# FORM EVENTS
def clear_query(widget, event, data):
global filters
results_out.clear_output()
keywords.v_model = ''
keywords.value = ''
geo_bbox.value = ''
geo_bbox.v_model = ''
select_facets.value = []
select_facets.v_model = []
page_number.value = 1
page_number.v_model = 1
per_page.value = 20
per_page.v_model = 20
facets_page_number.value = 1
facets_page_number.v_model = 1
facets_per_page.value = 20
facets_per_page.v_model = 20
sort_select.value = ''
sort_select.v_model = ''
sort_direction.value = ''
sort_direction.v_model = ''
filters = []
filters_out.clear_output()
create_filters()
display_filters()
def add_filter(widget, event, data):
filters_out.clear_output(wait=True)
id = len(filters)
create_filter(id)
display_filters()
# FORM DISPLAY FUNCTIONS
def containerise(element):
if not isinstance(element, list):
element = [element]
return v.Container(children=element, class_='mt-0 pt-0', fluid=True)
def select_field(widget, event, data):
id = int(widget.id.split('-')[-1])
changed = filters[id]
if data in facet_values:
items = facet_values[data]
if len(items) < 30:
changed['value'] = v.Select(items=items, label='Value to filter by:', v_model='')
else:
changed['value'] = v.Combobox(items=items, label='Value to filter by:', v_model='', autocomplete=True)
display_filters()
elif type(changed['value']).__name__ in ['Select', 'Combobox']:
changed['value'] = v.TextField(label='Value to filter by:', v_model='')
display_filters()
def create_filter(id):
global filters
new_filter = {}
new_filter['type'] = v.Select(
items = ['and', 'or', 'without'],
label = 'Filter type:',
class_ = 'mr-4',
v_model = 'and'
)
new_filter['field'] = v.Select(
items = sorted(facets),
label = 'Field to filter:',
class_ = 'mr-4',
v_model = '',
id = f'field-select-{id}'
)
new_filter['value'] = v.TextField(
label = 'Value to filter by:',
v_model = ''
)
new_filter['field'].on_event('change', select_field)
filters.append(new_filter)
def display_filters():
filters_out.clear_output(wait=True)
with filters_out:
rows = []
for f in filters:
rows.append(v.Row(children=[v.Col(children=[f['type']], cols=12, sm=3), v.Col(children=[f['field']], cols=12, sm=3), v.Col(children=[f['value']], cols=12, sm=6)]))
display(containerise(rows + [add_filter_button]))
def create_filters():
for f in range(0, filter_num):
create_filter(f)
# FORM WIDGETS
auth_out = widgets.Output()
text_out = widgets.Output()
filters_out = widgets.Output()
facets_out = widgets.Output()
page_params_out = widgets.Output()
geo_out = widgets.Output()
results_out = widgets.Output()
api_key = v.TextField(
value = '',
label = 'api_key',
hint = 'Paste in your DigitalNZ API key',
v_model = '',
persistent_hint=True
)
keywords = v.TextField(
value = '',
label = 'text',
hint = 'Keywords to search for',
v_model = '',
persistent_hint=True
)
geo_bbox = v.TextField(
label='geo_bbox',
hint='Provide N,W,S,E coordinates to filter by location',
v_model = '',
persistent_hint=True
)
add_filter_button = v.Btn(
children=[v.Icon(left=True, children=['mdi-filter-plus-outline']), 'Add another filter'],
color='', # 'success', 'info', 'warning', 'danger' or '',
small=True
)
add_filter_button.on_event('click', add_filter)
search_button = v.Btn(
children=['Search'],
color='primary', # 'success', 'info', 'warning', 'danger' or ''
class_ = 'mr-4 mt-10'
)
search_button.on_event('click', create_query)
clear_button = v.Btn(
children=['Clear'],
color='', # 'success', 'info', 'warning', 'danger' or ''
class_ = 'mt-10'
)
clear_button.on_event('click', clear_query)
select_facets = v.Select(
label='facets',
items=facets,
multiple=True,
v_model=[],
hint='Include vales for these facets',
persistent_hint=True
)
per_page = v.Select(
label='per_page',
items=[0, 20, 100],
v_model=20,
hint='Number of search results per page',
persistent_hint=True
)
page_number = v.TextField(
label='page',
v_model=1,
type='number',
hint='Page number of search results',
persistent_hint=True
)
page_settings = v.Row(children=[v.Col(children=[per_page], sm=4, cols=12), v.Col(children=[page_number], sm=4, cols=12)])
facets_per_page = v.Select(
label='facets_per_page',
items=[20, 100, 350],
v_model=20,
hint='Number of facet values per page',
persistent_hint=True
)
facets_page_number = v.TextField(
label='facets_page',
v_model=1,
type='number',
hint='Page number of facet results',
persistent_hint=True
)
facet_page_settings = v.Row(children=[v.Col(children=[facets_per_page], cols=12, sm=4), v.Col(children=[facets_page_number], cols=12, sm=4)])
sort_select = v.Select(
label='sort',
items=['', 'date', 'syndication_date'],
v_model='',
hint='Sort results (leave blank to sort by relevance)',
persistent_hint=True
)
sort_direction = v.Select(
label='direction',
items=['', 'asc', 'desc'],
v_model='',
hint='Direction to sort',
persistent_hint=True
)
sort_settings = v.Row(children=[v.Col(children=[sort_select], cols=12, sm=4), v.Col(children=[sort_direction], cols=12, sm=4)])
# PREPARE FORM ELEMENTS FOR DISPLAY
with auth_out:
display(containerise(api_key))
with text_out:
display(containerise(keywords))
create_filters()
display_filters()
with geo_out:
display(containerise(geo_bbox))
with page_params_out:
display(containerise([page_settings, sort_settings]))
with facets_out:
display(containerise([v.Row(children=[v.Col(children=[select_facets], cols=12, sm=4)]), facet_page_settings]))
display(containerise([search_button, clear_button]))
display(v.Divider())
# display(text_out, filters_out, facets_out, results_out)
Head over to the developers section of the DigitalNZ site to get yourself an API key.
display(auth_out)
Enter the terms you want to search for. Leave blank to match everything. For more complex keyword searches, including wildcards, proximity modifiers, fuzzy searches, and boolean operators, see the DigitalNZ search help page.
display(text_out)
Limit your results by applying filters to fields that are available as facets.
and
, or
, without
You can ask for facets below to find out what values are available for the different fields.
display(filters_out)
Supply a geospatial bounding box to limit results to those from places within the box. The box should be in the form: North limit (latitude), West limit (longitude), South limit (latitude), East limit (longitude)
. For example, for results from Australia:
display(geo_out)
The per_page
parameter changes the number of results return by each API request. The page
parameter asks for a particular page of results within the complete results set. By creating a loop that increments the page
value, you can access the complete set of results.
By default, results are sorted by relevance in descending order. You can change this by supplying values for sort
and direction
.
display(page_params_out)
Facets give you counts of values in particular fields. You can then use those values as filters (see above), or visualise the facet results to generate an overview of your search.
You can control the number of facet values for each API request using facets_per_page
. If there are more than 350 values (the maximum for facets_per_page
) you can request additional pages of using facets_page
.
display(facets_out)
display(results_out)
Created by Tim Sherratt for the GLAM Workbench. Support this project by becoming a GitHub sponsor.