#!/usr/bin/env python
# coding: utf-8

# # stats on `kubectl top pod`
# 
# Need to reimplement part of `kubectl top pod` because the Python client
# doesn't support the metrics endpoint.
# 
# Exploring current resource metrics of mybinder.org,
# and drawing some resource planning conclusions based on the results.

# Set kubernetes context and namespace

# In[1]:


context = 'prod'
namespace = 'prod'
get_ipython().run_line_magic('matplotlib', 'inline')


# Connect to kubernetes

# In[2]:


import kubernetes.config
import kubernetes.client
kubernetes.config.load_kube_config(context=context)
kube = kubernetes.client.CoreV1Api()


# Get the current pods as a dict:

# In[3]:


pods = kube.list_namespaced_pod(
    namespace,
    label_selector="app=jupyterhub,component=singleuser-server").items
# make it a dict by name
pods = {pod.metadata.name: pod for pod in pods}


# Make the API request to get pod metrics

# In[4]:


import json
api_client = kubernetes.client.ApiClient()
# kube client doesn't support metrics yet
# from https://github.com/kubernetes-client/python/issues/528
resp, status, headers = api_client.call_api(
    f'/apis/metrics.k8s.io/v1beta1/namespaces/{namespace}/pods',
    'GET',
    auth_settings=['BearerToken'],
    response_type='json',
    _preload_content=False)
metrics = json.loads(resp.read().decode('utf8'))


# Parse the metrics into a pandas data frame

# In[5]:


import re

import pandas as pd


def parse_memory(mem_s):
    """returns memory in MB"""
    num_s, suffix = re.match(r'(\d+)([^\d]+)', mem_s).groups()
    num = int(num_s)
    if suffix == 'Ki':
        num = int(num * 1e-3)
    elif suffix == 'Mi':
        pass
    elif suffix == 'Gi':
        num = int(num * 1e3)
    else:
        raise ValueError("unsupported memory suffix: %r" % suffix)
    return num

def parse_cpu(cpu_s):
    """Returns CPU as a float"""
    num_s, suffix = re.match(r'(\d+)([^\d]*)', cpu_s).groups()
    num = int(num_s)
    if suffix == 'm':
        num = num * 1e-3
    elif suffix:
        raise ValueError("unsupported cpu suffix: %r" % suffix)
    return num

data = []
for metric in metrics['items']:
    pod_name = metric['metadata']['name']
    if pod_name not in pods:
        continue
    pod = pods[pod_name]


    mem = 0
    cpu = 0
    for container in metric['containers']:
        mem += parse_memory(container['usage']['memory'])
        cpu += parse_cpu(container['usage']['cpu'])
    image = pod_name.split('-', 1)[1].rsplit('-', 1)[0].replace('-2d', '-')
    data.append([
        pod_name, image, mem, cpu
    ])

df = pd.DataFrame(data, columns=['pod', 'image', 'mem', 'cpu'])
df = df.sort_values('mem')
df


# ## Memory
# 
# Plot the pod memory distribution, both as a histogram and cumulative density function,
# so that we can see where the most pods are,
# and what fraction of pods would fit under a given limit.

# In[6]:


nbins = 100
ax = df.mem.hist(bins=nbins)

counts = pd.cut(df.mem, nbins).value_counts().sort_index()
left = counts.index.categories.left

color = next(ax._get_lines.prop_cycler)['color']
ax_right = ax.twinx()
ax.set_xlabel("pod memory (MB)")
ax.set_ylabel("pods")
ax.set_xlim(0, None)

ax.set_title("Memory distribution")
ax_right.set_ylabel("cumulative pod fraction")
ax_right.plot(left, np.cumsum(counts) / len(df), c=color, label="cumulative")
ax_right.grid(False)


# In[7]:


for threshold in [1000, 800, 512, 256]:
    print(f"users over {threshold} MB: {(df['mem'] > threshold).sum()}")

print(f"users under 100MB: {(df['mem'] <= 100).sum()}")


# Same data again as quantiles. This shows directly what limit would be required to fit a given fraction of pods.

# In[8]:


from IPython.display import display


mem_quant = df.mem.quantile([0.5, 0.75, 0.9, 0.95, 0.98, 0.99, 1])
display(mem_quant)
mem_quant.plot(kind='bar')
print(f"90% of pods use less than {int(mem_quant[0.9])}MB")
print(f"Only 5% of pods would be affected if the limit were {int(mem_quant[0.95])}MB")


# ## CPU
# 
# Similar plots again for CPU show that ~all pods are idle.

# In[9]:


ax = df.cpu.hist(bins=50)
ax.set_title("CPU")
ax.set_xlabel("pod CPU load")
ax.set_ylabel("# pods")
ax.set_xlim(0, None);


# In[10]:


cpu_quant = df.cpu.quantile([0.5, 0.75, 0.9, 0.95, 0.98, 0.99, 1])
display(cpu_quant)
print(f"only {(df.cpu > 0.1).sum()}/{len(df)} pods are using more than 10% CPU")
cpu_quant.plot(kind='bar')


# ## Resource planning
# 
# Now we can try to pick a node flavor based on this information.
# 
# We want to fit about 100 users on a node, since Kubernetes enforces a 110 pod limit per node, and we have ~150-400 users at a time on mybinder.org.

# In[11]:


# goal, close to kubernetes limit of 110 pods per node
users_per_node = 100


# We can start with a simple average-based estimate:

# In[12]:


mem_per_node = df.mem.mean() * users_per_node
cpu_per_node = df.cpu.mean() * users_per_node
print(f"   cpu: {cpu_per_node:.3f} cores")
print(f"memory: {mem_per_node / 1024:.3f}GB")


# While this averages out, outliers can cause problems if we plan too close to it.
# 
# Instead, we can try planning based on percentiles. E.g. if we assume that the top 10% outliers don't use more resources than the bottom 90%, we can plan based on the 90th percentile as 'average' or 'typical'.
# 
# Here are some resource plans based on different percentiles of current usage:

# In[13]:


from IPython.display import display, Markdown

# close to kubernetes limit of 110 pods per node
users_per_node = 100

for quantile in (0.9, 0.95, 0.98, 0.99):
    mem_per_user = mem_quant[quantile]
    cpu_per_user = cpu_quant[quantile]

    md = []
    md.append(f"## {int(100 * quantile)}th percentile")
    md.append(f"Assuming typical users stay below {int(100 * quantile)}th percentile, we need:")
    md.append("")
    md.append(f"- Memory: **{mem_per_user:.0f}MB** per pod")
    md.append(f"- CPU: **{cpu_per_user:.3f} cores** per pod")
    md.append("")
    md.append(f"For {users_per_node} users, that means each node must have:")
    md.append("")
    md.append(f"- Memory: **{mem_per_user * users_per_node / 1024:.1f}GB** per node")
    md.append(f"- CPU: **{cpu_per_user * users_per_node:.1f} cores** per node")
    display(Markdown('\n'.join(md)))


# Or we can reverse it. We can pick a node flavor and see which percentile of each resource would need to be 'average'.

# In[14]:


ratios = {
    'standard': 3.75 * 1024, 
    'highmem': 6.5 * 1024,
}

node_memory = {}

np.cumsum(df.cpu)
for cpu in 4, 8, 16:
    for flavor, ratio in ratios.items():
        mem = cpu * ratio
        node_memory[f'n1-{flavor}-{cpu}'] = mem
        cpu_quantile = (df.cpu <= cpu / users_per_node).sum() / len(df)
        mem_quantile = (df.mem <= mem / users_per_node).sum() / len(df)
        print(f"n1-{flavor}-{cpu}: {mem/1024:.0f}GB")
        print(f"  cpu: {100*cpu_quantile:.0f}%")
        print(f"  mem: {100*mem_quantile:.0f}%")


# So if we picked `n1-highmem-8`, we would be happy as long as the *average* user on a given node uses less resources than the current 98th percentile.

# ### Safety factor
# 
# The next 'safety factor' test is how many users hitting the *limit* can we handle, assuming the rest of users are 'typical'. Oversubscribing CPU isn't usually a big deal, but oversubscribing memory can be catastrophic, so we want to make sure it's pretty unlikey that a few users nearing the memory limit can't consume all of a machine.
# 
# Let's say we use the 90th percentile as "typical", how many users hitting a given *limit* would it take to run out of memory, for each node flavor?

# In[15]:


import operator
import matplotlib.pyplot as plt

quantile = 0.9
mem_typical = mem_quant[quantile]
mem_limit = 2048

n_hogs = np.arange(0, 21)
total_memory = n_hogs * mem_limit + (users_per_node - n_hogs) * mem_typical
plt.plot(n_hogs, total_memory / 1024, label="memory needed")
plt.xlim(0, n_hogs[-1])
for flavor, mem in sorted(
        node_memory.items(),
        key=operator.itemgetter(1),
        reverse=True):
    plt.plot([0, n_hogs[-1]], [mem / 1024] * 2, label=flavor)

plt.legend(loc=0)
plt.title(f"Memory needed, based on {int(100 * quantile)}th percentile and {mem_limit/1024:.0f}G limit")
plt.xlabel("Number of outliers")
plt.ylabel("GB")
plt.ylim(0, 100)


# From this chart, we can see that `n1-highmem-8` can withstand 18 pods sitting right at the limit of 2GB, as long as the remaining pods average below the 90th percentile.