Based on data from https://www.e-control.at/ for district Mödling.
import pandas as pd
import altair as alt
URL = 'https://raw.githubusercontent.com/Datenspieler/notebooks_for_blog/master/2020-treibstoff/treibstoff.csv'
df = pd.read_csv(URL)
df.date = pd.to_datetime(df.date)
df.tail()
df.info()
print(df.date.min())
print(df.date.max())
df['min'] = df.groupby(['type', 'date'])[['price']].transform('min')
df['mean'] = df.groupby(['type', 'date'])[['price']].transform('mean')
Since for this analysis only interested in minimum/mean price, not what gas station is offering that price, can drop a lot of data.
df = df[['date', 'type', 'min', 'mean']].drop_duplicates()
df.head(3)
chart_zero = alt.Chart(df).mark_line().encode(
x='date',
y=alt.Y(alt.repeat("column"), type='quantitative'),
color='type',
strokeDash='type',
).repeat(
column=['min', 'mean']
)
chart_zero
Some movement, escepcially if we look at the mean price. We can make it look more spectacular, if we don't let the price start at 0 - great trick and bad visualization practive at the same time 😉
chart_not_zero = alt.Chart(df).mark_line().encode(
x='date',
y=alt.Y(alt.repeat("column"), type='quantitative', scale=alt.Scale(zero=False)),
color='type',
strokeDash='type',
).repeat(
column=['min', 'mean']
)
chart_not_zero
To 'correct' for long term trend, let's take the 1 week (24 x 7 measurements) average.
df1 = df.set_index(['type', 'date']).unstack('type')
df1.columns = ['_'.join(col[::-1]).strip() for col in df1.columns.values]
df1.head()
df_delta = df1 - df1.rolling(7*24, center = True).mean()
df_delta = df_delta * 100 #Let's switch to Cent values
df_delta.head() # of course the first entries are NaN
df_delta.tail()
df_delta.describe()
df_delta = df_delta.reset_index()
df_chart = df_delta.groupby([df_delta.date.dt.day_name(), df_delta.date.dt.hour]).mean()#.stack()
df_chart.index.names = ['Day of week', 'Hour']
df_chart
df_chart = df_chart.reset_index()
df_chart.describe()
df_chart.select_dtypes('number').max() - df_chart.select_dtypes('number').min()
for (col, title_part) in [('DIE_min','diesel, cheapest gas station') ,
('DIE_mean', 'diesel, mean of 5 cheapest gas stations'),
('SUP_min','premium gas, cheapest gas station') ,
('SUP_mean', 'premium gas, mean of 5 cheapest gas stations')]:
chart = alt.Chart(df_chart).mark_rect().encode(
x='Hour:O',
y=alt.Y('Day of week:O', sort=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']),
color=alt.Color(col, type='quantitative', title='Cent',
scale=alt.Scale(scheme='redyellowgreen', domainMid = 0, domain = [-2, 2], reverse=True))
).properties(
title='Difference in gasoline price - ' + title_part
)
chart.save(col + '.svg')
display(chart)