In [1]:

from hubtraf.analysis.dataframe import logfile_to_df
df = logfile_to_df('chp-20.jsonl')

In [2]:

%matplotlib inline

In [3]:

df.dropna(subset=['duration']).hist(by='action', column='duration')

Out[3]:

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x119350198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11bf21630>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x11bf4bba8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11bf7d160>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x11bfa56d8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11bfccc50>]],
      dtype=object)

Load data from runs with both proxies of 20, 40, and 60 users

In [4]:

import pandas as pd

def load_data(nlist=(20, 40, 60)):
    frames = []
    for n in nlist:
        for proxy in ('chp', 'traefik'):
            fname = f"{proxy}-{n}.jsonl"
            df = logfile_to_df(fname)
            df = df.drop(columns=['attempt', 'iteration'])
            if 'extra' in df.columns:
                df = df.drop(columns=['extra'])
            df = df.assign(proxy=proxy, n=n, run=f"{proxy}-{n}")
            assert 'run' in df.columns
            frames.append(df)
    return pd.concat(frames, sort=False)

df = load_data()
source = df.dropna(subset=['duration'])

Plot timings for each run, so we can compare the proxies

In [5]:

import altair as alt

def bars(source, x, y='duration'):
    lower_box = f'q1({y}):Q'
    lower_whisker = f'min({y}):Q'
    upper_box = f'q3({y}):Q'
    upper_whisker = f'max({y}):Q'
    med = f'median({y}):Q'
    
    base = alt.Chart(source)
    
    bar_width=8

    # Compose each layer individually
    lower_plot = base.mark_rule().encode(
        y=alt.Y(lower_whisker, title=y, scale=alt.Scale(type='log')),
        y2=lower_box,
        x=x
    )

    middle_plot = base.mark_bar(size=bar_width).encode(
        y=lower_box,
        y2=upper_box,
        x=x
    )

    upper_plot = base.mark_rule().encode(
        y=upper_whisker,
        y2=upper_box,
        x=x
    )

    middle_tick = base.mark_tick(
        color='white',
        size=bar_width
    ).encode(
        y=med,
        x=x
    )

    return lower_plot + middle_plot + upper_plot + middle_tick

In [6]:

from IPython.display import display

for action in source.action.unique():
    chart = bars(source[source.action==action], x='run')
    display(chart.properties(title=action))

/Users/benjaminrk/conda/lib/python3.7/site-packages/altair/utils/core.py:294: FutureWarning: A future version of pandas will default to `skipna=True`. To silence this warning, pass `skipna=True|False` explicitly.
  attrs['type'] = infer_vegalite_type(data[attrs['field']])

Finally, check for failures:

In [7]:

df.phase.unique()

Out[7]:

array(['complete', 'start', 'attempt-start', 'attempt-complete'],
      dtype=object)

In [8]:

df[df.phase == 'failed']

Out[8]:

	action	duration	event	phase	username	proxy	n	run
timestamp

No failures!