import altair as alt
import pandas as pd
df = pd.read_csv("complete_page_list.csv", parse_dates=["date"])
df["session"].value_counts()
A 27244 M 24538 N 20971 U 1 Name: session, dtype: int64
df["year"] = df["date"].dt.year
df_years = df["year"].value_counts().to_frame().reset_index()
df_years.columns = ["year", "count"]
# Pages per year
alt.Chart(df_years).mark_bar().encode(x="year:N", y="count:Q")
df_dates = pd.DataFrame(df["date"].unique())
df_dates["year"] = df_dates[0].dt.year
df_years_dates = df_dates["year"].value_counts().to_frame().reset_index()
df_years_dates.columns = ["year", "count"]
# Days per year
alt.Chart(df_years_dates).mark_bar().encode(x="year:N", y="count:Q")
orders = {"M": 1, "N": 2, "A": 3, "U": 4}
df_sessions = (
df.groupby(by="year")["session"]
.value_counts()
.to_frame()
.rename(columns={"session": "count"}, level=0)
.reset_index()
)
df_sessions["order"] = df_sessions["session"].apply(lambda x: orders[x])
# Days per year
alt.Chart(df_sessions).mark_bar().encode(
x="year:N",
y="count:Q",
color="session:N",
order=alt.Order(
# Sort the segments of the bars by this field
"order",
sort="ascending",
),
)
Created by Tim Sherratt for the GLAM Workbench.