Sydney Stock Exchange – view sheets

Select a date range to view details of available sheets.

In [ ]:
# This notebook is designed to run in Voila as an app (with the code hidden).
# To launch this notebook in Voila, just select 'View > Open with Voila in New Browser Tab'
# Your browser might ask for permission to open the new tab as a popup.
In [ ]:
import pandas as pd
import ipywidgets as widgets
import arrow
import datetime
from pathlib import Path
from IPython.display import display, HTML, Image
from urllib.parse import quote_plus
import json
from collections import Counter
import re
from page_data_master import *

CLOUDSTOR_URL = 'https://cloudstor.aarnet.edu.au/plus/s/i02k4gxeEpMAUkm'
In [ ]:
def get_pages(vol_num, page_num):
    for key, pages in pages_per_vol.items():
        vols = key.split('_')
        vols = [int(y) for y in vols]
        if len(vols) == 2:
            vols = list(range(vols[0], vols[1] + 1))
        if vol_num in vols:
            for p_key, p_pages in pages.items():
                p_range = p_key.split('_')
                if p_range[1] == '*':
                    if page_num >= int(p_range[0]):
                        return p_pages
                else:
                    if page_num >= int(p_range[0]) and page_num <= int(p_range[1]):
                        return p_pages
In [ ]:
# Get the list of dates
df_dates = pd.read_csv('complete_date_list.csv', parse_dates=['date'])
# Get the list of pages
df_pages = pd.read_csv('complete_page_list.csv', parse_dates=['date'])
# Merge dates and pages on the date field
df = pd.merge(df_dates, df_pages, how='left', on='date').sort_values(by=['date', 'page_num'])
In [ ]:
def find_vol(date):
    series = pd.read_csv('series_list.csv')
    for volume in series.itertuples():
        find_date = arrow.get(date)
        start_date = arrow.get(volume.start_date, 'YYYY-MM-DD')
        end_date = arrow.get(volume.end_date, 'YYYY-MM-DD')
        if find_date >= start_date and find_date <= end_date:
            return int(re.search(r'-(\d+)$', volume.Item_number).group(1).strip())
    return None

def make_clickable(val):
    # target _blank to open new window
    return '<a target="_blank" href="{}">Download</a>'.format(val, val)

def find_pages():
    results.clear_output()
    images = []
    # start_date = arrow.get(start_date.value)
    # end_date = arrow.get(end_date.value)
    with results:
        dates = df_dates.loc[(df_dates['date'] >= pd.Timestamp(start_date.value)) & (df_dates['date'] <= pd.Timestamp(end_date.value))]
        for date in dates.itertuples():
            display(HTML(f'<h3>{arrow.get(date.date).format("D MMMM YYYY")}</h3>'))
            if pd.notnull(date.reason):
                display(HTML(f'<p>{date.reason}</p>'))
            if date.pages > 0:
                pages = df_pages.loc[df_pages['date'] == date.date].copy(deep=False)
                first_page = pages.iloc[0]['page_num']
                vol_num = pages.iloc[0]['vol_num']
                
                cloudstor_folder_url = f'{CLOUDSTOR_URL}/download?path=AU%20NBAC%20N193-{vol_num:03}'
                pages['image_url'] = pages.apply(lambda x: f'{cloudstor_folder_url}&files=N193-{int(vol_num):03}_{int(x["page_num"]):04}.jpg', axis=1)
    
                # Get the expected number of pages
                expected = get_pages(int(vol_num), int(first_page))
                if date.date.weekday() < 5:
                    expected_pages = expected['weekday']
                elif date.date.weekday() == 5:
                    expected_pages = expected['saturday']
                expected_sessions = expected_pages[1]
                if date.expected != date.pages:
                    display(HTML(f'<p><b>Number of pages</b>: <span style="background-color: yellow;">{date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages["session"].str.cat()})</span></p>'))
                else:
                    display(HTML(f'<p><b>Number of pages</b>: {date.expected} expected ({expected_sessions}) / {int(date.pages)} found ({pages["session"].str.cat()})</p>'))
                display(pages.style.format({'image_url': make_clickable}))
                
In [ ]:
def start(b):
    find_pages()

start_date = widgets.DatePicker(
    description='Start date',
    disabled=False,
    value=datetime.date(1901, 1, 1)
)

end_date = widgets.DatePicker(
    description='End date',
    disabled=False,
    value=datetime.date(1901, 6, 30)
)

find = widgets.Button(
    description='Find pages',
    disabled=False,
    button_style='primary', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click me',
    icon='search'
)

find.on_click(start)
results = widgets.Output()
display(widgets.HBox([widgets.VBox([start_date, end_date]), find]))
display(results)

Created by Tim Sherratt for the GLAM Workbench.