Select a date range to view details of available sheets.
# This notebook is designed to run in Voila as an app (with the code hidden).
# To launch this notebook in Voila, just select 'View > Open with Voila in New Browser Tab'
# Your browser might ask for permission to open the new tab as a popup.
import datetime
import re
import arrow
import ipywidgets as widgets
import pandas as pd
from IPython.display import HTML, display
from page_data_master import pages_per_vol
CLOUDSTOR_URL = "https://cloudstor.aarnet.edu.au/plus/s/i02k4gxeEpMAUkm"
def get_pages(vol_num, page_num):
for key, pages in pages_per_vol.items():
vols = key.split("_")
vols = [int(y) for y in vols]
if len(vols) == 2:
vols = list(range(vols[0], vols[1] + 1))
if vol_num in vols:
for p_key, p_pages in pages.items():
p_range = p_key.split("_")
if p_range[1] == "*":
if page_num >= int(p_range[0]):
return p_pages
else:
if page_num >= int(p_range[0]) and page_num <= int(p_range[1]):
return p_pages
# Get the list of dates
df_dates = pd.read_csv("complete_date_list.csv", parse_dates=["date"])
# Get the list of pages
df_pages = pd.read_csv("complete_page_list.csv", parse_dates=["date"])
# Merge dates and pages on the date field
df = pd.merge(df_dates, df_pages, how="left", on="date").sort_values(
by=["date", "page_num"]
)
def find_vol(date):
series = pd.read_csv("series_list.csv")
for volume in series.itertuples():
find_date = arrow.get(date)
start_date = arrow.get(volume.start_date, "YYYY-MM-DD")
end_date = arrow.get(volume.end_date, "YYYY-MM-DD")
if find_date >= start_date and find_date <= end_date:
return int(re.search(r"-(\d+)$", volume.Item_number).group(1).strip())
return None
def make_clickable(val):
# target _blank to open new window
return '<a target="_blank" href="{}">Download</a>'.format(val)
def find_pages():
results.clear_output()
# start_date = arrow.get(start_date.value)
# end_date = arrow.get(end_date.value)
with results:
dates = df_dates.loc[
(df_dates["date"] >= pd.Timestamp(start_date.value))
& (df_dates["date"] <= pd.Timestamp(end_date.value))
]
for date in dates.itertuples():
display(HTML(f'<h3>{arrow.get(date.date).format("D MMMM YYYY")}</h3>'))
if pd.notnull(date.reason):
display(HTML(f"<p>{date.reason}</p>"))
if date.pages > 0:
pages = df_pages.loc[df_pages["date"] == date.date].copy(deep=False)
first_page = pages.iloc[0]["page_num"]
vol_num = pages.iloc[0]["vol_num"]
cloudstor_folder_url = (
f"{CLOUDSTOR_URL}/download?path=AU%20NBAC%20N193-{vol_num:03}"
)
pages["image_url"] = pages.apply(
lambda x: f'{cloudstor_folder_url}&files=N193-{int(vol_num):03}_{int(x["page_num"]):04}.jpg',
axis=1,
)
# Get the expected number of pages
expected = get_pages(int(vol_num), int(first_page))
if date.date.weekday() < 5:
expected_pages = expected["weekday"]
elif date.date.weekday() == 5:
expected_pages = expected["saturday"]
expected_sessions = expected_pages[1]
if date.expected != date.pages:
display(
HTML(
f'<p><b>Number of pages</b>: <span style="background-color: yellow;">{date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages["session"].str.cat()})</span></p>'
)
)
else:
display(
HTML(
f'<p><b>Number of pages</b>: {date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages["session"].str.cat()})</p>'
)
)
display(pages.style.format({"image_url": make_clickable}))
def start(b):
find_pages()
start_date = widgets.DatePicker(
description="Start date", disabled=False, value=datetime.date(1901, 1, 1)
)
end_date = widgets.DatePicker(
description="End date", disabled=False, value=datetime.date(1901, 6, 30)
)
find = widgets.Button(
description="Find pages",
disabled=False,
button_style="primary", # 'success', 'info', 'warning', 'danger' or ''
tooltip="Click me",
icon="search",
)
find.on_click(start)
results = widgets.Output()
display(widgets.HBox([widgets.VBox([start_date, end_date]), find]))
display(results)
Created by Tim Sherratt for the GLAM Workbench.