kubectl top pod
¶Need to reimplement part of kubectl top pod
because the Python client
doesn't support the metrics endpoint.
Exploring current resource metrics of mybinder.org, and drawing some resource planning conclusions based on the results.
Set kubernetes context and namespace
context = 'prod'
namespace = 'prod'
%matplotlib inline
Connect to kubernetes
import kubernetes.config
import kubernetes.client
kubernetes.config.load_kube_config(context=context)
kube = kubernetes.client.CoreV1Api()
Get the current pods as a dict:
pods = kube.list_namespaced_pod(
namespace,
label_selector="app=jupyterhub,component=singleuser-server").items
# make it a dict by name
pods = {pod.metadata.name: pod for pod in pods}
Make the API request to get pod metrics
import json
api_client = kubernetes.client.ApiClient()
# kube client doesn't support metrics yet
# from https://github.com/kubernetes-client/python/issues/528
resp, status, headers = api_client.call_api(
f'/apis/metrics.k8s.io/v1beta1/namespaces/{namespace}/pods',
'GET',
auth_settings=['BearerToken'],
response_type='json',
_preload_content=False)
metrics = json.loads(resp.read().decode('utf8'))
Parse the metrics into a pandas data frame
import re
import pandas as pd
def parse_memory(mem_s):
"""returns memory in MB"""
num_s, suffix = re.match(r'(\d+)([^\d]+)', mem_s).groups()
num = int(num_s)
if suffix == 'Ki':
num = int(num * 1e-3)
elif suffix == 'Mi':
pass
elif suffix == 'Gi':
num = int(num * 1e3)
else:
raise ValueError("unsupported memory suffix: %r" % suffix)
return num
def parse_cpu(cpu_s):
"""Returns CPU as a float"""
num_s, suffix = re.match(r'(\d+)([^\d]*)', cpu_s).groups()
num = int(num_s)
if suffix == 'm':
num = num * 1e-3
elif suffix:
raise ValueError("unsupported cpu suffix: %r" % suffix)
return num
data = []
for metric in metrics['items']:
pod_name = metric['metadata']['name']
if pod_name not in pods:
continue
pod = pods[pod_name]
mem = 0
cpu = 0
for container in metric['containers']:
mem += parse_memory(container['usage']['memory'])
cpu += parse_cpu(container['usage']['cpu'])
image = pod_name.split('-', 1)[1].rsplit('-', 1)[0].replace('-2d', '-')
data.append([
pod_name, image, mem, cpu
])
df = pd.DataFrame(data, columns=['pod', 'image', 'mem', 'cpu'])
df = df.sort_values('mem')
df
pod | image | mem | cpu | |
---|---|---|---|---|
50 | jupyter-binder-2dexamples-2dr-2daezilgbm | binder-examples-r | 47 | 0.000 |
55 | jupyter-rceatpitt-2ddata-2d-2dics-2dsummer-2d2... | rceatpitt-data--ics-summer-2018 | 47 | 0.000 |
157 | jupyter-quantstack-2dxeus-2dcling-2dis1vzteh | quantstack-xeus-cling | 47 | 0.000 |
192 | jupyter-binder-2dexamples-2dr-2dczjbj4eh | binder-examples-r | 47 | 0.000 |
144 | jupyter-gap-2dsystem-2dgap-2ddocker-2dbinder-2... | gap-system-gap-docker-binder | 48 | 0.000 |
216 | jupyter-jupyterlab-2djupyterlab-2ddemo-2d6ltpksmq | jupyterlab-jupyterlab-demo | 48 | 0.000 |
161 | jupyter-bokeh-2dbokeh-2dnotebooks-2dlofg2p6t | bokeh-bokeh-notebooks | 48 | 0.000 |
35 | jupyter-tobyhodges-2ditpp-2dmo4vzylv | tobyhodges-itpp | 49 | 0.000 |
65 | jupyter-ipython-2dipython-2din-2ddepth-2d6mmep52k | ipython-ipython-in-depth | 49 | 0.000 |
147 | jupyter-ipython-2dipython-2din-2ddepth-2dqi9sbwrs | ipython-ipython-in-depth | 49 | 0.000 |
24 | jupyter-bokeh-2dbokeh-2dnotebooks-2dvscholq7 | bokeh-bokeh-notebooks | 49 | 0.000 |
148 | jupyter-bokeh-2dbokeh-2dnotebooks-2dc3lr5w04 | bokeh-bokeh-notebooks | 49 | 0.000 |
84 | jupyter-bokeh-2dbokeh-2dnotebooks-2d1xctxtux | bokeh-bokeh-notebooks | 49 | 0.000 |
173 | jupyter-jupyterlab-2djupyterlab-2ddemo-2detwer7c0 | jupyterlab-jupyterlab-demo | 49 | 0.000 |
226 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dx2vstrdq | jupyterlab-jupyterlab-demo | 49 | 0.000 |
101 | jupyter-ipython-2dipython-2din-2ddepth-2dh8yooduj | ipython-ipython-in-depth | 49 | 0.000 |
10 | jupyter-jupyterlab-2djupyterlab-2ddemo-2df54tenot | jupyterlab-jupyterlab-demo | 49 | 0.000 |
92 | jupyter-ipython-2dipython-2din-2ddepth-2dh13j4kew | ipython-ipython-in-depth | 49 | 0.000 |
94 | jupyter-quantstack-2dxeus-2dcling-2dfo112hyt | quantstack-xeus-cling | 49 | 0.000 |
182 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dxvjwsw4t | jupyterlab-jupyterlab-demo | 49 | 0.000 |
224 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dvdrq9wyi | jupyterlab-jupyterlab-demo | 50 | 0.000 |
49 | jupyter-jupyterlab-2djupyterlab-2ddemo-2d7rxvwopd | jupyterlab-jupyterlab-demo | 50 | 0.000 |
63 | jupyter-rceatpitt-2ddata-2d-2dics-2dsummer-2d2... | rceatpitt-data--ics-summer-2018 | 50 | 0.000 |
82 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dnwks54yk | jupyterlab-jupyterlab-demo | 50 | 0.000 |
118 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dm9h2bfqr | jupyterlab-jupyterlab-demo | 50 | 0.000 |
96 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dpxl6rf47 | jupyterlab-jupyterlab-demo | 50 | 0.001 |
58 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dg4cbibz4 | jupyterlab-jupyterlab-demo | 50 | 0.000 |
88 | jupyter-jupyterlab-2djupyterlab-2ddemo-2ddyg9a6se | jupyterlab-jupyterlab-demo | 50 | 0.000 |
62 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dxat60biv | jupyterlab-jupyterlab-demo | 50 | 0.000 |
89 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dwouzbm9f | jupyterlab-jupyterlab-demo | 50 | 0.000 |
... | ... | ... | ... | ... |
42 | jupyter-quantstack-2dxeus-2dcling-2dcuwn5h8y | quantstack-xeus-cling | 164 | 0.000 |
172 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dwtq9skqe | jupyterlab-jupyterlab-demo | 167 | 0.005 |
8 | jupyter-marvinf95-2dvtna-5ffrontend-2dalwjmk3p | marvinf95-vtna-5ffrontend | 170 | 0.000 |
77 | jupyter-marvinf95-2dvtna-5ffrontend-2dawr1isub | marvinf95-vtna-5ffrontend | 170 | 0.000 |
47 | jupyter-ipython-2dipython-2din-2ddepth-2d9xpax4cw | ipython-ipython-in-depth | 179 | 0.986 |
179 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dbnkqj4uz | jupyterlab-jupyterlab-demo | 183 | 0.007 |
233 | jupyter-ipython-2dipython-2din-2ddepth-2djeh0uin7 | ipython-ipython-in-depth | 189 | 0.020 |
156 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dp4z2wsol | jupyterlab-jupyterlab-demo | 202 | 0.002 |
83 | jupyter-geoscixyz-2dem-2dapps-2d4122t7cz | geoscixyz-em-apps | 210 | 0.000 |
139 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dgipsebhd | jupyterlab-jupyterlab-demo | 217 | 0.002 |
59 | jupyter-rasahq-2drasa-5fcore-2dh4luhbhi | rasahq-rasa-5fcore | 227 | 0.038 |
136 | jupyter-jupyterlab-2djupyterlab-2ddemo-2doydlrvqv | jupyterlab-jupyterlab-demo | 239 | 0.020 |
18 | jupyter-kelvin1020-2dorbit-2dj4j8hn62 | kelvin1020-orbit | 256 | 0.000 |
221 | jupyter-ines-2dspacy-2dio-2dbinder-2duuf1m6gc | ines-spacy-io-binder | 259 | 0.000 |
199 | jupyter-ines-2dspacy-2dio-2dbinder-2duhl1o33g | ines-spacy-io-binder | 260 | 0.000 |
152 | jupyter-ines-2dspacy-2dio-2dbinder-2d0hgzwktu | ines-spacy-io-binder | 260 | 0.096 |
17 | jupyter-geoscixyz-2dem-2dapps-2dcjcu5swq | geoscixyz-em-apps | 261 | 0.000 |
115 | jupyter-ines-2dspacy-2dio-2dbinder-2drtf2t9zx | ines-spacy-io-binder | 262 | 0.000 |
31 | jupyter-ines-2dspacy-2dio-2dbinder-2dt43ge31h | ines-spacy-io-binder | 263 | 0.000 |
196 | jupyter-ipython-2dipython-2din-2ddepth-2dhus1b7hn | ipython-ipython-in-depth | 264 | 0.001 |
87 | jupyter-ines-2dspacy-2dio-2dbinder-2diz93tzma | ines-spacy-io-binder | 264 | 0.000 |
186 | jupyter-ipython-2dipython-2din-2ddepth-2dz34g2z1i | ipython-ipython-in-depth | 300 | 0.006 |
129 | jupyter-ines-2dspacy-2dio-2dbinder-2d848kbxcs | ines-spacy-io-binder | 340 | 0.000 |
33 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dzz7lx2nk | jupyterlab-jupyterlab-demo | 385 | 0.002 |
29 | jupyter-ines-2dspacy-2dio-2dbinder-2dcs5dufd0 | ines-spacy-io-binder | 487 | 0.000 |
22 | jupyter-spencerpark-2dijava-2dbinder-2dn7eoo4yg | spencerpark-ijava-binder | 559 | 0.008 |
60 | jupyter-jupyterlab-2djupyterlab-2ddemo-2dsqcso5wh | jupyterlab-jupyterlab-demo | 583 | 0.000 |
109 | jupyter-jupyterlab-2djupyterlab-2ddemo-2d59ayfypm | jupyterlab-jupyterlab-demo | 627 | 0.001 |
200 | jupyter-ipython-2dipython-2din-2ddepth-2dkpa8bja2 | ipython-ipython-in-depth | 969 | 1.006 |
51 | jupyter-ipython-2dipython-2din-2ddepth-2divz6bgqb | ipython-ipython-in-depth | 978 | 0.060 |
239 rows × 4 columns
Plot the pod memory distribution, both as a histogram and cumulative density function, so that we can see where the most pods are, and what fraction of pods would fit under a given limit.
nbins = 100
ax = df.mem.hist(bins=nbins)
counts = pd.cut(df.mem, nbins).value_counts().sort_index()
left = counts.index.categories.left
color = next(ax._get_lines.prop_cycler)['color']
ax_right = ax.twinx()
ax.set_xlabel("pod memory (MB)")
ax.set_ylabel("pods")
ax.set_xlim(0, None)
ax.set_title("Memory distribution")
ax_right.set_ylabel("cumulative pod fraction")
ax_right.plot(left, np.cumsum(counts) / len(df), c=color, label="cumulative")
ax_right.grid(False)
for threshold in [1000, 800, 512, 256]:
print(f"users over {threshold} MB: {(df['mem'] > threshold).sum()}")
print(f"users under 100MB: {(df['mem'] <= 100).sum()}")
users over 1000 MB: 0 users over 800 MB: 2 users over 512 MB: 5 users over 256 MB: 17 users under 100MB: 171
Same data again as quantiles. This shows directly what limit would be required to fit a given fraction of pods.
from IPython.display import display
mem_quant = df.mem.quantile([0.5, 0.75, 0.9, 0.95, 0.98, 0.99, 1])
display(mem_quant)
mem_quant.plot(kind='bar')
print(f"90% of pods use less than {int(mem_quant[0.9])}MB")
print(f"Only 5% of pods would be affected if the limit were {int(mem_quant[0.95])}MB")
0.50 87.00 0.75 113.00 0.90 184.20 0.95 262.10 0.98 504.28 0.99 610.28 1.00 978.00 Name: mem, dtype: float64
90% of pods use less than 184MB Only 5% of pods would be affected if the limit were 262MB
Similar plots again for CPU show that ~all pods are idle.
ax = df.cpu.hist(bins=50)
ax.set_title("CPU")
ax.set_xlabel("pod CPU load")
ax.set_ylabel("# pods")
ax.set_xlim(0, None);
cpu_quant = df.cpu.quantile([0.5, 0.75, 0.9, 0.95, 0.98, 0.99, 1])
display(cpu_quant)
print(f"only {(df.cpu > 0.1).sum()}/{len(df)} pods are using more than 10% CPU")
cpu_quant.plot(kind='bar')
0.50 0.00000 0.75 0.00150 0.90 0.00420 0.95 0.02000 0.98 0.06864 0.99 0.96884 1.00 1.00600 Name: cpu, dtype: float64
only 4/239 pods are using more than 10% CPU
<matplotlib.axes._subplots.AxesSubplot at 0x10b982da0>
Now we can try to pick a node flavor based on this information.
We want to fit about 100 users on a node, since Kubernetes enforces a 110 pod limit per node, and we have ~150-400 users at a time on mybinder.org.
# goal, close to kubernetes limit of 110 pods per node
users_per_node = 100
We can start with a simple average-based estimate:
mem_per_node = df.mem.mean() * users_per_node
cpu_per_node = df.cpu.mean() * users_per_node
print(f" cpu: {cpu_per_node:.3f} cores")
print(f"memory: {mem_per_node / 1024:.3f}GB")
cpu: 1.887 cores memory: 10.827GB
While this averages out, outliers can cause problems if we plan too close to it.
Instead, we can try planning based on percentiles. E.g. if we assume that the top 10% outliers don't use more resources than the bottom 90%, we can plan based on the 90th percentile as 'average' or 'typical'.
Here are some resource plans based on different percentiles of current usage:
from IPython.display import display, Markdown
# close to kubernetes limit of 110 pods per node
users_per_node = 100
for quantile in (0.9, 0.95, 0.98, 0.99):
mem_per_user = mem_quant[quantile]
cpu_per_user = cpu_quant[quantile]
md = []
md.append(f"## {int(100 * quantile)}th percentile")
md.append(f"Assuming typical users stay below {int(100 * quantile)}th percentile, we need:")
md.append("")
md.append(f"- Memory: **{mem_per_user:.0f}MB** per pod")
md.append(f"- CPU: **{cpu_per_user:.3f} cores** per pod")
md.append("")
md.append(f"For {users_per_node} users, that means each node must have:")
md.append("")
md.append(f"- Memory: **{mem_per_user * users_per_node / 1024:.1f}GB** per node")
md.append(f"- CPU: **{cpu_per_user * users_per_node:.1f} cores** per node")
display(Markdown('\n'.join(md)))
Assuming typical users stay below 90th percentile, we need:
For 100 users, that means each node must have:
Assuming typical users stay below 95th percentile, we need:
For 100 users, that means each node must have:
Assuming typical users stay below 98th percentile, we need:
For 100 users, that means each node must have:
Assuming typical users stay below 99th percentile, we need:
For 100 users, that means each node must have:
Or we can reverse it. We can pick a node flavor and see which percentile of each resource would need to be 'average'.
ratios = {
'standard': 3.75 * 1024,
'highmem': 6.5 * 1024,
}
node_memory = {}
np.cumsum(df.cpu)
for cpu in 4, 8, 16:
for flavor, ratio in ratios.items():
mem = cpu * ratio
node_memory[f'n1-{flavor}-{cpu}'] = mem
cpu_quantile = (df.cpu <= cpu / users_per_node).sum() / len(df)
mem_quantile = (df.mem <= mem / users_per_node).sum() / len(df)
print(f"n1-{flavor}-{cpu}: {mem/1024:.0f}GB")
print(f" cpu: {100*cpu_quantile:.0f}%")
print(f" mem: {100*mem_quantile:.0f}%")
n1-standard-4: 15GB cpu: 96% mem: 87% n1-highmem-4: 26GB cpu: 96% mem: 96% n1-standard-8: 30GB cpu: 98% mem: 97% n1-highmem-8: 52GB cpu: 98% mem: 98% n1-standard-16: 60GB cpu: 98% mem: 99% n1-highmem-16: 104GB cpu: 98% mem: 100%
So if we picked n1-highmem-8
, we would be happy as long as the average user on a given node uses less resources than the current 98th percentile.
The next 'safety factor' test is how many users hitting the limit can we handle, assuming the rest of users are 'typical'. Oversubscribing CPU isn't usually a big deal, but oversubscribing memory can be catastrophic, so we want to make sure it's pretty unlikey that a few users nearing the memory limit can't consume all of a machine.
Let's say we use the 90th percentile as "typical", how many users hitting a given limit would it take to run out of memory, for each node flavor?
import operator
import matplotlib.pyplot as plt
quantile = 0.9
mem_typical = mem_quant[quantile]
mem_limit = 2048
n_hogs = np.arange(0, 21)
total_memory = n_hogs * mem_limit + (users_per_node - n_hogs) * mem_typical
plt.plot(n_hogs, total_memory / 1024, label="memory needed")
plt.xlim(0, n_hogs[-1])
for flavor, mem in sorted(
node_memory.items(),
key=operator.itemgetter(1),
reverse=True):
plt.plot([0, n_hogs[-1]], [mem / 1024] * 2, label=flavor)
plt.legend(loc=0)
plt.title(f"Memory needed, based on {int(100 * quantile)}th percentile and {mem_limit/1024:.0f}G limit")
plt.xlabel("Number of outliers")
plt.ylabel("GB")
plt.ylim(0, 100)
(0, 100)
From this chart, we can see that n1-highmem-8
can withstand 18 pods sitting right at the limit of 2GB, as long as the remaining pods average below the 90th percentile.