Stock exchange – visualising page data

In [1]:
import pandas as pd
from IPython.display import display, HTML
import altair as alt
In [2]:
df = pd.read_csv('complete_page_list.csv', parse_dates=['date'])
In [3]:
df['session'].value_counts()
Out[3]:
A    27244
M    24538
N    20971
U        1
Name: session, dtype: int64
In [4]:
df['year'] = df['date'].dt.year
In [5]:
df_years = df['year'].value_counts().to_frame().reset_index()
df_years.columns = ['year', 'count']
In [6]:
# Pages per year

alt.Chart(df_years).mark_bar().encode(
    x='year:N',
    y='count:Q'
)
Out[6]:
In [7]:
df_dates = pd.DataFrame(df['date'].unique())
df_dates['year'] = df_dates[0].dt.year
df_years_dates = df_dates['year'].value_counts().to_frame().reset_index()
df_years_dates.columns = ['year', 'count']
In [8]:
# Days per year

alt.Chart(df_years_dates).mark_bar().encode(
    x='year:N',
    y='count:Q'
)
Out[8]:
In [9]:
orders = {'M': 1, 'N': 2, 'A': 3, 'U': 4}
df_sessions = df.groupby(by='year')['session'].value_counts().to_frame().rename(columns={'session': 'count'}, level=0).reset_index()
df_sessions['order'] = df_sessions['session'].apply(lambda x: orders[x])
In [10]:
# Days per year

alt.Chart(df_sessions).mark_bar().encode(
    x='year:N',
    y='count:Q',
    color='session:N',
    order=alt.Order(
      # Sort the segments of the bars by this field
      'order',
      sort='ascending'
    )
)
Out[10]:

Created by Tim Sherratt for the GLAM Workbench.