Select a date range to view details of available sheets.
# This notebook is designed to run in Voila as an app (with the code hidden).
# To launch this notebook in Voila, just select 'View > Open with Voila in New Browser Tab'
# Your browser might ask for permission to open the new tab as a popup.
import pandas as pd
import ipywidgets as widgets
import arrow
import datetime
from pathlib import Path
from IPython.display import display, HTML, Image
from urllib.parse import quote_plus
import json
from collections import Counter
import re
from page_data_master import *
CLOUDSTOR_URL = 'https://cloudstor.aarnet.edu.au/plus/s/i02k4gxeEpMAUkm'
def get_pages(vol_num, page_num):
for key, pages in pages_per_vol.items():
vols = key.split('_')
vols = [int(y) for y in vols]
if len(vols) == 2:
vols = list(range(vols[0], vols[1] + 1))
if vol_num in vols:
for p_key, p_pages in pages.items():
p_range = p_key.split('_')
if p_range[1] == '*':
if page_num >= int(p_range[0]):
return p_pages
else:
if page_num >= int(p_range[0]) and page_num <= int(p_range[1]):
return p_pages
# Get the list of dates
df_dates = pd.read_csv('complete_date_list.csv', parse_dates=['date'])
# Get the list of pages
df_pages = pd.read_csv('complete_page_list.csv', parse_dates=['date'])
# Merge dates and pages on the date field
df = pd.merge(df_dates, df_pages, how='left', on='date').sort_values(by=['date', 'page_num'])
def find_vol(date):
series = pd.read_csv('series_list.csv')
for volume in series.itertuples():
find_date = arrow.get(date)
start_date = arrow.get(volume.start_date, 'YYYY-MM-DD')
end_date = arrow.get(volume.end_date, 'YYYY-MM-DD')
if find_date >= start_date and find_date <= end_date:
return int(re.search(r'-(\d+)$', volume.Item_number).group(1).strip())
return None
def make_clickable(val):
# target _blank to open new window
return '<a target="_blank" href="{}">Download</a>'.format(val, val)
def find_pages():
results.clear_output()
images = []
# start_date = arrow.get(start_date.value)
# end_date = arrow.get(end_date.value)
with results:
dates = df_dates.loc[(df_dates['date'] >= pd.Timestamp(start_date.value)) & (df_dates['date'] <= pd.Timestamp(end_date.value))]
for date in dates.itertuples():
display(HTML(f'<h3>{arrow.get(date.date).format("D MMMM YYYY")}</h3>'))
if pd.notnull(date.reason):
display(HTML(f'<p>{date.reason}</p>'))
if date.pages > 0:
pages = df_pages.loc[df_pages['date'] == date.date].copy(deep=False)
first_page = pages.iloc[0]['page_num']
vol_num = pages.iloc[0]['vol_num']
cloudstor_folder_url = f'{CLOUDSTOR_URL}/download?path=AU%20NBAC%20N193-{vol_num:03}'
pages['image_url'] = pages.apply(lambda x: f'{cloudstor_folder_url}&files=N193-{int(vol_num):03}_{int(x["page_num"]):04}.jpg', axis=1)
# Get the expected number of pages
expected = get_pages(int(vol_num), int(first_page))
if date.date.weekday() < 5:
expected_pages = expected['weekday']
elif date.date.weekday() == 5:
expected_pages = expected['saturday']
expected_sessions = expected_pages[1]
if date.expected != date.pages:
display(HTML(f'<p><b>Number of pages</b>: <span style="background-color: yellow;">{date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages["session"].str.cat()})</span></p>'))
else:
display(HTML(f'<p><b>Number of pages</b>: {date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages["session"].str.cat()})</p>'))
display(pages.style.format({'image_url': make_clickable}))
def start(b):
find_pages()
start_date = widgets.DatePicker(
description='Start date',
disabled=False,
value=datetime.date(1901, 1, 1)
)
end_date = widgets.DatePicker(
description='End date',
disabled=False,
value=datetime.date(1901, 6, 30)
)
find = widgets.Button(
description='Find pages',
disabled=False,
button_style='primary', # 'success', 'info', 'warning', 'danger' or ''
tooltip='Click me',
icon='search'
)
find.on_click(start)
results = widgets.Output()
display(widgets.HBox([widgets.VBox([start_date, end_date]), find]))
display(results)
Created by Tim Sherratt for the GLAM Workbench.