import pandas as pd
from IPython.display import display, HTML
import altair as alt
df = pd.read_csv('complete_page_list.csv', parse_dates=['date'])
df['session'].value_counts()
A 27244 M 24538 N 20971 U 1 Name: session, dtype: int64
df['year'] = df['date'].dt.year
df_years = df['year'].value_counts().to_frame().reset_index()
df_years.columns = ['year', 'count']
# Pages per year
alt.Chart(df_years).mark_bar().encode(
x='year:N',
y='count:Q'
)
df_dates = pd.DataFrame(df['date'].unique())
df_dates['year'] = df_dates[0].dt.year
df_years_dates = df_dates['year'].value_counts().to_frame().reset_index()
df_years_dates.columns = ['year', 'count']
# Days per year
alt.Chart(df_years_dates).mark_bar().encode(
x='year:N',
y='count:Q'
)
orders = {'M': 1, 'N': 2, 'A': 3, 'U': 4}
df_sessions = df.groupby(by='year')['session'].value_counts().to_frame().rename(columns={'session': 'count'}, level=0).reset_index()
df_sessions['order'] = df_sessions['session'].apply(lambda x: orders[x])
# Days per year
alt.Chart(df_sessions).mark_bar().encode(
x='year:N',
y='count:Q',
color='session:N',
order=alt.Order(
# Sort the segments of the bars by this field
'order',
sort='ascending'
)
)
Created by Tim Sherratt for the GLAM Workbench.