#!/usr/bin/env python
# coding: utf-8

# # Measuring the perfomance cost of _preload_content
# 
# Collected results for measuring `list_namespaced_pod` with and without `_preload_content=False`,
# run for several versions of the kubernetes Python client.
# 
# This cost seems to have risen for large numbers of resources in recent versions of kubernetes, particularly 12.0.
# 
# To produce this data, a few queries are run against mybinder.org-prod, which has ~75 placholder pods and ~600 user pods.
# See `measure-preload-content` for data generation.

# In[1]:


import altair as alt
import pandas as pd


# In[2]:


df = pd.read_csv("measurements.csv")
df


# In[3]:


charts = []
# sort by version numbers
versions = sorted(df.kube_version.unique(), key=lambda vs: [int(i) for i in vs.split(".")])

# create a chart for each metric
# box plot for each combination of kube_version and preload=true|false
for fname, group in df.groupby("fname").groups.items():
    group_df = df.filter(group, axis=0)
    n_pods = int(group_df.pods.mean())
    approx = len(group_df.pods.unique()) > 1
    title = f"{fname} ({'~' * approx}{n_pods} pod{'s' * (n_pods > 1)})"
    if group_df.per_call.max() < 1:
        scale = alt.Scale(domain=[0, 1])
    else:
        scale = alt.Scale()
    charts.append(
        alt.Chart(df.filter(group, axis=0), title=title)
        .mark_boxplot()
        .encode(
            color="preload",
            x=alt.X("kube_version:O", sort=versions),
            y=alt.Y("per_call", scale=scale, title="seconds per call"),
        )
    )
alt.hconcat(*charts) # .resolve_scale(y='shared')