#!/usr/bin/env python
# coding: utf-8

# In[1]:


from hubtraf.analysis.dataframe import logfile_to_df
df = logfile_to_df('chp-20.jsonl')


# In[2]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[3]:


df.dropna(subset=['duration']).hist(by='action', column='duration')


# Load data from runs with both proxies of 20, 40, and 60 users

# In[4]:


import pandas as pd

def load_data(nlist=(20, 40, 60)):
    frames = []
    for n in nlist:
        for proxy in ('chp', 'traefik'):
            fname = f"{proxy}-{n}.jsonl"
            df = logfile_to_df(fname)
            df = df.drop(columns=['attempt', 'iteration'])
            if 'extra' in df.columns:
                df = df.drop(columns=['extra'])
            df = df.assign(proxy=proxy, n=n, run=f"{proxy}-{n}")
            assert 'run' in df.columns
            frames.append(df)
    return pd.concat(frames, sort=False)

df = load_data()
source = df.dropna(subset=['duration'])


# Plot timings for each run, so we can compare the proxies

# In[5]:


import altair as alt

def bars(source, x, y='duration'):
    lower_box = f'q1({y}):Q'
    lower_whisker = f'min({y}):Q'
    upper_box = f'q3({y}):Q'
    upper_whisker = f'max({y}):Q'
    med = f'median({y}):Q'
    
    base = alt.Chart(source)
    
    bar_width=8

    # Compose each layer individually
    lower_plot = base.mark_rule().encode(
        y=alt.Y(lower_whisker, title=y, scale=alt.Scale(type='log')),
        y2=lower_box,
        x=x
    )

    middle_plot = base.mark_bar(size=bar_width).encode(
        y=lower_box,
        y2=upper_box,
        x=x
    )

    upper_plot = base.mark_rule().encode(
        y=upper_whisker,
        y2=upper_box,
        x=x
    )

    middle_tick = base.mark_tick(
        color='white',
        size=bar_width
    ).encode(
        y=med,
        x=x
    )

    return lower_plot + middle_plot + upper_plot + middle_tick


# In[6]:


from IPython.display import display

for action in source.action.unique():
    chart = bars(source[source.action==action], x='run')
    display(chart.properties(title=action))


# Finally, check for failures:

# In[7]:


df.phase.unique()


# In[8]:


df[df.phase == 'failed']


# No failures!