from hubtraf.analysis.dataframe import logfile_to_df
df = logfile_to_df('chp-20.jsonl')
%matplotlib inline
df.dropna(subset=['duration']).hist(by='action', column='duration')
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x119350198>, <matplotlib.axes._subplots.AxesSubplot object at 0x11bf21630>], [<matplotlib.axes._subplots.AxesSubplot object at 0x11bf4bba8>, <matplotlib.axes._subplots.AxesSubplot object at 0x11bf7d160>], [<matplotlib.axes._subplots.AxesSubplot object at 0x11bfa56d8>, <matplotlib.axes._subplots.AxesSubplot object at 0x11bfccc50>]], dtype=object)
Load data from runs with both proxies of 20, 40, and 60 users
import pandas as pd
def load_data(nlist=(20, 40, 60)):
frames = []
for n in nlist:
for proxy in ('chp', 'traefik'):
fname = f"{proxy}-{n}.jsonl"
df = logfile_to_df(fname)
df = df.drop(columns=['attempt', 'iteration'])
if 'extra' in df.columns:
df = df.drop(columns=['extra'])
df = df.assign(proxy=proxy, n=n, run=f"{proxy}-{n}")
assert 'run' in df.columns
frames.append(df)
return pd.concat(frames, sort=False)
df = load_data()
source = df.dropna(subset=['duration'])
Plot timings for each run, so we can compare the proxies
import altair as alt
def bars(source, x, y='duration'):
lower_box = f'q1({y}):Q'
lower_whisker = f'min({y}):Q'
upper_box = f'q3({y}):Q'
upper_whisker = f'max({y}):Q'
med = f'median({y}):Q'
base = alt.Chart(source)
bar_width=8
# Compose each layer individually
lower_plot = base.mark_rule().encode(
y=alt.Y(lower_whisker, title=y, scale=alt.Scale(type='log')),
y2=lower_box,
x=x
)
middle_plot = base.mark_bar(size=bar_width).encode(
y=lower_box,
y2=upper_box,
x=x
)
upper_plot = base.mark_rule().encode(
y=upper_whisker,
y2=upper_box,
x=x
)
middle_tick = base.mark_tick(
color='white',
size=bar_width
).encode(
y=med,
x=x
)
return lower_plot + middle_plot + upper_plot + middle_tick
from IPython.display import display
for action in source.action.unique():
chart = bars(source[source.action==action], x='run')
display(chart.properties(title=action))
/Users/benjaminrk/conda/lib/python3.7/site-packages/altair/utils/core.py:294: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly. attrs['type'] = infer_vegalite_type(data[attrs['field']])
Finally, check for failures:
df.phase.unique()
array(['complete', 'start', 'attempt-start', 'attempt-complete'], dtype=object)
df[df.phase == 'failed']
action | duration | event | phase | username | proxy | n | run | |
---|---|---|---|---|---|---|---|---|
timestamp |
No failures!