Sydney Stock Exchange trading activity, 1901–1950

These charts show the amount of trading activity recorded each day by the Sydney Stock Exchange. At the end of each trading session, details of any activity were recorded by hand on printed sheets, under the headings 'Buyers', 'Sellers', and 'Business done' or 'Sales' (the column headings changed over time). Using computer vision technology and the Amazon Textract service, the printed labels and handwritten entries have been extracted from the sheets and compiled into a tabular format. This data is far from perfect, and is still being checked and cleaned. Some values will be missing altogether, particularly on pages with dense, overlapping annotations. However, even with these caveats, the raw number of handwritten entries in the 'Buyers', 'Sellers', and 'Sales' columns can show patterns in trading activity over time.

The charts show the proportion of stocks each day that have prices recorded. Click on any of the points on the charts to view activity on a particular date.

In [1]:
%%capture
import altair as alt
import pandas as pd
from IPython.display import display

alt.data_transformers.enable("default")


def blank_href():
    return {"usermeta": {"embedOptions": {"loader": {"target": "_blank"}}}}


# register the custom theme under a chosen name
alt.themes.register("blank_href", blank_href)

# enable the newly registered theme
alt.themes.enable("blank_href")

CLOUDSTOR_SHARE_LINK = "https://cloudstor.aarnet.edu.au/plus/s/RwRrCpisBac7N38"
In [2]:
def calculate_activity_by_day(year):
    # df = pd.read_csv(Path('compiled-ocr-data', f'{year}-textract.csv'), parse_dates=['date'])
    df = pd.read_csv(
        f"{CLOUDSTOR_SHARE_LINK}/download?files={year}-textract.csv",
        parse_dates=["date"],
        dtype={
            "label": "object",
            "buyers": "object",
            "sellers": "object",
            "sales": "object",
        },
    )
    dfs = []
    total = df["date"].value_counts().to_frame()
    total.columns = ["total"]
    # any_activity = df.loc[(df['sales'].notnull()) | (df['buyers'].notnull()) | (df['sellers'].notnull())]['date'].value_counts().to_frame()
    any_activity = (
        df.loc[(df["buyers"].notnull()) | (df["sellers"].notnull())]["date"]
        .value_counts()
        .to_frame()
    )
    any_activity.columns = ["count"]
    any_activity["activity"] = "any"
    any_totals = pd.concat([any_activity, total], axis=1)
    r = pd.date_range(
        start=f"{any_totals.index.min().year}-01-01",
        end=f"{any_totals.index.max().year}-12-31",
    )
    any_totals = any_totals.reindex(r)
    any_totals["activity"].fillna("any", inplace=True)
    any_totals.fillna(0, inplace=True)
    dfs.append(any_totals)
    buyers = df.loc[df["buyers"].notnull()]["date"].value_counts().to_frame()
    buyers.columns = ["count"]
    buyers["activity"] = "buyers"
    dfs.append(pd.concat([buyers, total], axis=1))
    sellers = df.loc[df["sellers"].notnull()]["date"].value_counts().to_frame()
    sellers.columns = ["count"]
    sellers["activity"] = "sellers"
    dfs.append(pd.concat([sellers, total], axis=1))
    sales = df.loc[df["sales"].notnull()]["date"].value_counts().to_frame()
    sales.columns = ["count"]
    sales["activity"] = "sales"
    dfs.append(pd.concat([sales, total], axis=1))
    combined = pd.concat(dfs).rename_axis("date").reset_index()
    # combined['year'] = year
    # display(combined)
    return combined
In [3]:
for year in range(1901, 1951):
    df = calculate_activity_by_day(year)

    chart = (
        alt.Chart(df.loc[df["activity"].isin(["buyers", "sellers", "sales"])])
        .transform_calculate(
            ratio="datum.count / datum.total",
            iso_date='timeFormat(datum.date, "%Y-%m-%d")',
            url="https://sydney-stock-exchange-xqtkxtd5za-ts.a.run.app/stock_exchange/stocks?date__exact="
            + alt.datum.iso_date
            + "&"
            + alt.datum.activity
            + "__notblank=1",
        )
        .mark_point()
        .encode(
            x=alt.X("date:T", axis=alt.Axis(grid=False, format="%B"), title="Date"),
            y=alt.Y(
                "ratio:Q",
                axis=alt.Axis(format="%", title="Percentage of stocks active"),
            ),
            color=alt.Color(
                "activity:N",
                sort=["buyers", "sellers", "sales"],
                legend=alt.Legend(title="Activity"),
            ),
            href="url:N",
            tooltip=[
                alt.Tooltip("date", format="%a, %e %b %Y"),
                alt.Tooltip("ratio:Q", format=".2%", title="percent of stocks active"),
                alt.Tooltip("count", title="number of stocks active"),
                "activity",
            ],
        )
        .interactive()
    )

    chart2 = (
        alt.Chart(df.loc[df["activity"] == "any"])
        .transform_calculate(
            ratio="datum.count / datum.total",
        )
        .mark_area(opacity=0.2, color="grey")
        .encode(
            x="date:T",
            y="ratio:Q",
            tooltip=[
                alt.Tooltip("date", format="%a, %e %b %Y"),
                alt.Tooltip("ratio:Q", format=".2%", title="percent of stocks active"),
                "count",
                "activity:N",
            ],
        )
        .interactive()
    )

    display(
        (chart2 + chart)
        .configure_title(fontSize=20)
        .properties(width=800, height=300, title=str(year), padding=20)
    )