#!/usr/bin/env python # coding: utf-8 # # stats on `kubectl top pod` # # Need to reimplement part of `kubectl top pod` because the Python client # doesn't support the metrics endpoint. # # Exploring current resource metrics of mybinder.org, # and drawing some resource planning conclusions based on the results. # Set kubernetes context and namespace # In[1]: context = 'prod' namespace = 'prod' get_ipython().run_line_magic('matplotlib', 'inline') # Connect to kubernetes # In[2]: import kubernetes.config import kubernetes.client kubernetes.config.load_kube_config(context=context) kube = kubernetes.client.CoreV1Api() # Get the current pods as a dict: # In[3]: pods = kube.list_namespaced_pod( namespace, label_selector="app=jupyterhub,component=singleuser-server").items # make it a dict by name pods = {pod.metadata.name: pod for pod in pods} # Make the API request to get pod metrics # In[4]: import json api_client = kubernetes.client.ApiClient() # kube client doesn't support metrics yet # from https://github.com/kubernetes-client/python/issues/528 resp, status, headers = api_client.call_api( f'/apis/metrics.k8s.io/v1beta1/namespaces/{namespace}/pods', 'GET', auth_settings=['BearerToken'], response_type='json', _preload_content=False) metrics = json.loads(resp.read().decode('utf8')) # Parse the metrics into a pandas data frame # In[5]: import re import pandas as pd def parse_memory(mem_s): """returns memory in MB""" num_s, suffix = re.match(r'(\d+)([^\d]+)', mem_s).groups() num = int(num_s) if suffix == 'Ki': num = int(num * 1e-3) elif suffix == 'Mi': pass elif suffix == 'Gi': num = int(num * 1e3) else: raise ValueError("unsupported memory suffix: %r" % suffix) return num def parse_cpu(cpu_s): """Returns CPU as a float""" num_s, suffix = re.match(r'(\d+)([^\d]*)', cpu_s).groups() num = int(num_s) if suffix == 'm': num = num * 1e-3 elif suffix: raise ValueError("unsupported cpu suffix: %r" % suffix) return num data = [] for metric in metrics['items']: pod_name = metric['metadata']['name'] if pod_name not in pods: continue pod = pods[pod_name] mem = 0 cpu = 0 for container in metric['containers']: mem += parse_memory(container['usage']['memory']) cpu += parse_cpu(container['usage']['cpu']) image = pod_name.split('-', 1)[1].rsplit('-', 1)[0].replace('-2d', '-') data.append([ pod_name, image, mem, cpu ]) df = pd.DataFrame(data, columns=['pod', 'image', 'mem', 'cpu']) df = df.sort_values('mem') df # ## Memory # # Plot the pod memory distribution, both as a histogram and cumulative density function, # so that we can see where the most pods are, # and what fraction of pods would fit under a given limit. # In[6]: nbins = 100 ax = df.mem.hist(bins=nbins) counts = pd.cut(df.mem, nbins).value_counts().sort_index() left = counts.index.categories.left color = next(ax._get_lines.prop_cycler)['color'] ax_right = ax.twinx() ax.set_xlabel("pod memory (MB)") ax.set_ylabel("pods") ax.set_xlim(0, None) ax.set_title("Memory distribution") ax_right.set_ylabel("cumulative pod fraction") ax_right.plot(left, np.cumsum(counts) / len(df), c=color, label="cumulative") ax_right.grid(False) # In[7]: for threshold in [1000, 800, 512, 256]: print(f"users over {threshold} MB: {(df['mem'] > threshold).sum()}") print(f"users under 100MB: {(df['mem'] <= 100).sum()}") # Same data again as quantiles. This shows directly what limit would be required to fit a given fraction of pods. # In[8]: from IPython.display import display mem_quant = df.mem.quantile([0.5, 0.75, 0.9, 0.95, 0.98, 0.99, 1]) display(mem_quant) mem_quant.plot(kind='bar') print(f"90% of pods use less than {int(mem_quant[0.9])}MB") print(f"Only 5% of pods would be affected if the limit were {int(mem_quant[0.95])}MB") # ## CPU # # Similar plots again for CPU show that ~all pods are idle. # In[9]: ax = df.cpu.hist(bins=50) ax.set_title("CPU") ax.set_xlabel("pod CPU load") ax.set_ylabel("# pods") ax.set_xlim(0, None); # In[10]: cpu_quant = df.cpu.quantile([0.5, 0.75, 0.9, 0.95, 0.98, 0.99, 1]) display(cpu_quant) print(f"only {(df.cpu > 0.1).sum()}/{len(df)} pods are using more than 10% CPU") cpu_quant.plot(kind='bar') # ## Resource planning # # Now we can try to pick a node flavor based on this information. # # We want to fit about 100 users on a node, since Kubernetes enforces a 110 pod limit per node, and we have ~150-400 users at a time on mybinder.org. # In[11]: # goal, close to kubernetes limit of 110 pods per node users_per_node = 100 # We can start with a simple average-based estimate: # In[12]: mem_per_node = df.mem.mean() * users_per_node cpu_per_node = df.cpu.mean() * users_per_node print(f" cpu: {cpu_per_node:.3f} cores") print(f"memory: {mem_per_node / 1024:.3f}GB") # While this averages out, outliers can cause problems if we plan too close to it. # # Instead, we can try planning based on percentiles. E.g. if we assume that the top 10% outliers don't use more resources than the bottom 90%, we can plan based on the 90th percentile as 'average' or 'typical'. # # Here are some resource plans based on different percentiles of current usage: # In[13]: from IPython.display import display, Markdown # close to kubernetes limit of 110 pods per node users_per_node = 100 for quantile in (0.9, 0.95, 0.98, 0.99): mem_per_user = mem_quant[quantile] cpu_per_user = cpu_quant[quantile] md = [] md.append(f"## {int(100 * quantile)}th percentile") md.append(f"Assuming typical users stay below {int(100 * quantile)}th percentile, we need:") md.append("") md.append(f"- Memory: **{mem_per_user:.0f}MB** per pod") md.append(f"- CPU: **{cpu_per_user:.3f} cores** per pod") md.append("") md.append(f"For {users_per_node} users, that means each node must have:") md.append("") md.append(f"- Memory: **{mem_per_user * users_per_node / 1024:.1f}GB** per node") md.append(f"- CPU: **{cpu_per_user * users_per_node:.1f} cores** per node") display(Markdown('\n'.join(md))) # Or we can reverse it. We can pick a node flavor and see which percentile of each resource would need to be 'average'. # In[14]: ratios = { 'standard': 3.75 * 1024, 'highmem': 6.5 * 1024, } node_memory = {} np.cumsum(df.cpu) for cpu in 4, 8, 16: for flavor, ratio in ratios.items(): mem = cpu * ratio node_memory[f'n1-{flavor}-{cpu}'] = mem cpu_quantile = (df.cpu <= cpu / users_per_node).sum() / len(df) mem_quantile = (df.mem <= mem / users_per_node).sum() / len(df) print(f"n1-{flavor}-{cpu}: {mem/1024:.0f}GB") print(f" cpu: {100*cpu_quantile:.0f}%") print(f" mem: {100*mem_quantile:.0f}%") # So if we picked `n1-highmem-8`, we would be happy as long as the *average* user on a given node uses less resources than the current 98th percentile. # ### Safety factor # # The next 'safety factor' test is how many users hitting the *limit* can we handle, assuming the rest of users are 'typical'. Oversubscribing CPU isn't usually a big deal, but oversubscribing memory can be catastrophic, so we want to make sure it's pretty unlikey that a few users nearing the memory limit can't consume all of a machine. # # Let's say we use the 90th percentile as "typical", how many users hitting a given *limit* would it take to run out of memory, for each node flavor? # In[15]: import operator import matplotlib.pyplot as plt quantile = 0.9 mem_typical = mem_quant[quantile] mem_limit = 2048 n_hogs = np.arange(0, 21) total_memory = n_hogs * mem_limit + (users_per_node - n_hogs) * mem_typical plt.plot(n_hogs, total_memory / 1024, label="memory needed") plt.xlim(0, n_hogs[-1]) for flavor, mem in sorted( node_memory.items(), key=operator.itemgetter(1), reverse=True): plt.plot([0, n_hogs[-1]], [mem / 1024] * 2, label=flavor) plt.legend(loc=0) plt.title(f"Memory needed, based on {int(100 * quantile)}th percentile and {mem_limit/1024:.0f}G limit") plt.xlabel("Number of outliers") plt.ylabel("GB") plt.ylim(0, 100) # From this chart, we can see that `n1-highmem-8` can withstand 18 pods sitting right at the limit of 2GB, as long as the remaining pods average below the 90th percentile.