#!/usr/bin/env python # coding: utf-8 # # Build a DigitalNZ API search query # # You can experiment with the DigitalNZ search API by entering values in the form below. For more information on the available parameters, see the [API documentation](https://digitalnz.org/developers/api-docs-v3/search-records-api-v3). # In[ ]: import requests import ipywidgets as widgets from IPython.display import HTML, Markdown import ipyvuetify as v import json import IPython from pathlib import Path import pandas as pd import pprint # Display code highlighting properly def display_source(code, language='json'): def _jupyterlab_repr_html_(self): from pygments import highlight from pygments.formatters import HtmlFormatter fmt = HtmlFormatter() style = "".format( fmt.get_style_defs(".output_html"), fmt.get_style_defs(".jp-RenderedHTML") ) return style + highlight(self.data, self._get_lexer(), fmt) # Replace _repr_html_ with our own version that adds the 'jp-RenderedHTML' class # in addition to 'output_html'. IPython.display.Code._repr_html_ = _jupyterlab_repr_html_ return IPython.display.Code(data=code, language=language) API_URL = 'http://api.digitalnz.org/v3/records.json' # In[ ]: # LOAD FACET DATA def load_facet(facet): df = pd.read_csv(Path('facets', f'{facet}.csv')) items = [str(s) for s in df['value'].to_list()] return sorted(items, key=str.casefold) facets = [ 'category', 'creator', 'placename', 'year', 'decade', 'century', 'language', 'content_partner', 'rights', 'collection', 'usage', 'copyright', 'dc_type', 'format', 'subject', 'primary_collection' ] # Load values for selected facets so these can be used in dropdown selects # These facets all have less than 1000 unique values facet_values = {} for facet in ['category', 'usage', 'content_partner', 'century', 'language', 'copyright', 'primary_collection', 'year', 'creator']: facet_values[facet] = load_facet(facet) # API QUERY FUNCTIONS def get_results(params): ''' Retrieve and display API request results. ''' results_out.clear_output() response = requests.get(API_URL, params=params) data = response.json() query_url = response.url with results_out: display(Markdown('### API request parameters\n\n')) display(display_source(pprint.pformat(params, indent=4), 'python')) display(Markdown('### API request url')) display(HTML(f'{query_url}')) display(Markdown('### API response')) display(display_source(json.dumps(data, indent=2))) def create_query(widget, event, data): ''' Build the API request url from the form values. ''' params = { 'api_key': api_key.v_model, } if keywords.v_model: params['text'] = keywords.v_model if geo_bbox.v_model: params['geo_bbox'] = geo_bbox.v_model for f in filters: if f['value'].v_model and f['field'].v_model: params[f'{f["type"].v_model}[{f["field"].v_model}][]'] = f['value'].v_model if select_facets.v_model: params['facets'] = ','.join(select_facets.v_model) params['facets_per_page'] = facets_per_page.v_model params['facets_page_number'] = facets_page_number.v_model if sort_select.v_model: params['sort'] = sort_select.v_model if sort_direction.v_model: params['direction'] = sort_direction.v_model params['per_page'] = per_page.v_model params['page'] = page_number.v_model get_results(params) # FORM VARIABLES filters = [] filter_num = 2 # FORM EVENTS def clear_query(widget, event, data): global filters results_out.clear_output() keywords.v_model = '' keywords.value = '' geo_bbox.value = '' geo_bbox.v_model = '' select_facets.value = [] select_facets.v_model = [] page_number.value = 1 page_number.v_model = 1 per_page.value = 20 per_page.v_model = 20 facets_page_number.value = 1 facets_page_number.v_model = 1 facets_per_page.value = 20 facets_per_page.v_model = 20 sort_select.value = '' sort_select.v_model = '' sort_direction.value = '' sort_direction.v_model = '' filters = [] filters_out.clear_output() create_filters() display_filters() def add_filter(widget, event, data): filters_out.clear_output(wait=True) id = len(filters) create_filter(id) display_filters() # FORM DISPLAY FUNCTIONS def containerise(element): if not isinstance(element, list): element = [element] return v.Container(children=element, class_='mt-0 pt-0', fluid=True) def select_field(widget, event, data): id = int(widget.id.split('-')[-1]) changed = filters[id] if data in facet_values: items = facet_values[data] if len(items) < 30: changed['value'] = v.Select(items=items, label='Value to filter by:', v_model='') else: changed['value'] = v.Combobox(items=items, label='Value to filter by:', v_model='', autocomplete=True) display_filters() elif type(changed['value']).__name__ in ['Select', 'Combobox']: changed['value'] = v.TextField(label='Value to filter by:', v_model='') display_filters() def create_filter(id): global filters new_filter = {} new_filter['type'] = v.Select( items = ['and', 'or', 'without'], label = 'Filter type:', class_ = 'mr-4', v_model = 'and' ) new_filter['field'] = v.Select( items = sorted(facets), label = 'Field to filter:', class_ = 'mr-4', v_model = '', id = f'field-select-{id}' ) new_filter['value'] = v.TextField( label = 'Value to filter by:', v_model = '' ) new_filter['field'].on_event('change', select_field) filters.append(new_filter) def display_filters(): filters_out.clear_output(wait=True) with filters_out: rows = [] for f in filters: rows.append(v.Row(children=[v.Col(children=[f['type']], cols=12, sm=3), v.Col(children=[f['field']], cols=12, sm=3), v.Col(children=[f['value']], cols=12, sm=6)])) display(containerise(rows + [add_filter_button])) def create_filters(): for f in range(0, filter_num): create_filter(f) # FORM WIDGETS auth_out = widgets.Output() text_out = widgets.Output() filters_out = widgets.Output() facets_out = widgets.Output() page_params_out = widgets.Output() geo_out = widgets.Output() results_out = widgets.Output() api_key = v.TextField( value = '', label = 'api_key', hint = 'Paste in your DigitalNZ API key', v_model = '', persistent_hint=True ) keywords = v.TextField( value = '', label = 'text', hint = 'Keywords to search for', v_model = '', persistent_hint=True ) geo_bbox = v.TextField( label='geo_bbox', hint='Provide N,W,S,E coordinates to filter by location', v_model = '', persistent_hint=True ) add_filter_button = v.Btn( children=[v.Icon(left=True, children=['mdi-filter-plus-outline']), 'Add another filter'], color='', # 'success', 'info', 'warning', 'danger' or '', small=True ) add_filter_button.on_event('click', add_filter) search_button = v.Btn( children=['Search'], color='primary', # 'success', 'info', 'warning', 'danger' or '' class_ = 'mr-4 mt-10' ) search_button.on_event('click', create_query) clear_button = v.Btn( children=['Clear'], color='', # 'success', 'info', 'warning', 'danger' or '' class_ = 'mt-10' ) clear_button.on_event('click', clear_query) select_facets = v.Select( label='facets', items=facets, multiple=True, v_model=[], hint='Include vales for these facets', persistent_hint=True ) per_page = v.Select( label='per_page', items=[0, 20, 100], v_model=20, hint='Number of search results per page', persistent_hint=True ) page_number = v.TextField( label='page', v_model=1, type='number', hint='Page number of search results', persistent_hint=True ) page_settings = v.Row(children=[v.Col(children=[per_page], sm=4, cols=12), v.Col(children=[page_number], sm=4, cols=12)]) facets_per_page = v.Select( label='facets_per_page', items=[20, 100, 350], v_model=20, hint='Number of facet values per page', persistent_hint=True ) facets_page_number = v.TextField( label='facets_page', v_model=1, type='number', hint='Page number of facet results', persistent_hint=True ) facet_page_settings = v.Row(children=[v.Col(children=[facets_per_page], cols=12, sm=4), v.Col(children=[facets_page_number], cols=12, sm=4)]) sort_select = v.Select( label='sort', items=['', 'date', 'syndication_date'], v_model='', hint='Sort results (leave blank to sort by relevance)', persistent_hint=True ) sort_direction = v.Select( label='direction', items=['', 'asc', 'desc'], v_model='', hint='Direction to sort', persistent_hint=True ) sort_settings = v.Row(children=[v.Col(children=[sort_select], cols=12, sm=4), v.Col(children=[sort_direction], cols=12, sm=4)]) # PREPARE FORM ELEMENTS FOR DISPLAY with auth_out: display(containerise(api_key)) with text_out: display(containerise(keywords)) create_filters() display_filters() with geo_out: display(containerise(geo_bbox)) with page_params_out: display(containerise([page_settings, sort_settings])) with facets_out: display(containerise([v.Row(children=[v.Col(children=[select_facets], cols=12, sm=4)]), facet_page_settings])) display(containerise([search_button, clear_button])) display(v.Divider()) # display(text_out, filters_out, facets_out, results_out) # ### Authentication # # Head over to the developers section of the DigitalNZ site to [get yourself an API key](https://digitalnz.org/developers/getting-started). # In[ ]: display(auth_out) # ### Search terms # # Enter the terms you want to search for. Leave blank to match everything. For more complex keyword searches, including wildcards, proximity modifiers, fuzzy searches, and boolean operators, see the [DigitalNZ search help page](https://digitalnz.org/help/how-to-search-digitalnz). # In[ ]: display(text_out) # ### Filters # # Limit your results by applying filters to fields that are available as facets. # # * **Filter type** – how the filters are combined `and`, `or`, `without` # * **Field to filter** – select from the list of fields available as facets # * **Value to filter by** – if the field contains fewer than 1000 unique values, you'll be able to select a value from the dropdown list, otherwise enter a text value # # You can ask for facets below to find out what values are available for the different fields. # In[ ]: display(filters_out) # ### Geospatial limits # # Supply a geospatial bounding box to limit results to those from places within the box. The box should be in the form: `North limit (latitude), West limit (longitude), South limit (latitude), East limit (longitude)`. For example, for results from Australia: # # * -10.6681857235, 113.338953078, -43.6345972634, 153.569469029 # In[ ]: display(geo_out) # ### Results parameters # # The `per_page` parameter changes the number of results return by each API request. The `page` parameter asks for a particular page of results within the complete results set. By creating a loop that increments the `page` value, you can access the complete set of results. # # By default, results are sorted by relevance in descending order. You can change this by supplying values for `sort` and `direction`. # In[ ]: display(page_params_out) # ### Include facets # # Facets give you counts of values in particular fields. You can then use those values as filters (see above), or visualise the facet results to generate an overview of your search. # # You can control the number of facet values for each API request using `facets_per_page`. If there are more than 350 values (the maximum for `facets_per_page`) you can request additional pages of using `facets_page`. # In[ ]: display(facets_out) display(results_out) # ---- # # Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.net/). Support this project by becoming a [GitHub sponsor](https://github.com/sponsors/wragge?o=esb).