Finding a “Kneedle” in a Haystack: Detecting Knee Points in System Behavior
Ville Satopa, Jeannie Albrecht, David Irwin, and Barath Raghavan
https://www1.icsi.berkeley.edu/~barath/papers/kneedle-simplex11.pdf
import numpy as np
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
def figure2():
x = np.linspace(0.0, 1, 10)
with np.errstate(divide='ignore'):
return x,np.true_divide(-1, x + 0.1) + 5
x,y = figure2()
if not np.array_equal(np.array(x), np.sort(x)):
raise ValueError('x needs to be sorted')
from scipy.interpolate import interp1d,UnivariateSpline,InterpolatedUnivariateSpline
N = len(x)
# Ds = the finite set of x- and y-values that define a smooth curve,
# one that has been fit to a smoothing spline.
uspline = interp1d(x,y)
Ds_x = np.linspace(np.min(x), np.max(x), N)
Ds_y = uspline(Ds_x)
plt.plot(x,y);
plt.plot(Ds_x,Ds_y);
def normalize(a):
"""return the normalized input array"""
return (a - min(a)) / (max(a) - min(a))
# x and y normalized to unit square
xsn = normalize(Ds_x)
ysn = normalize(Ds_y)
# the difference curve
xd = xsn
yd = ysn - xsn
plt.title("Normalized spline & difference curve");
plt.plot(xsn,ysn);
plt.plot(xd,yd);
of the difference curve
from scipy.signal import argrelextrema
# local maxima for knees
xmx_idx = argrelextrema(yd, np.greater)[0]
xmx = xd[xmx_idx]
ymx = yd[xmx_idx]
#Dmx = np.stack((xmx,ymx))
# local minima
xmn_idx = argrelextrema(yd, np.less)[0]
xmn = xd[xmn_idx]
ymn = yd[xmn_idx]
plt.title("local maxima in difference curve");
plt.plot(xsn,ysn);
plt.plot(xd,yd);
plt.hlines(ymx, plt.xlim()[0], plt.xlim()[1]);
# Sensitivity parameter S
# smaller values detect knees quicker
S = 1.0
def threshold(ymx_i):
return ymx_i - (S * np.diff(xsn).mean())
Tmx = threshold(ymx)
Tmx
array([ 0.42528736])
If any difference value (xdj, ydj), where j > i, drops below the threshold y = T|mxi
for (x|mxi, y|mxi) before the
next local maximum in the difference curve is reached,
Kneedle declares a knee at the x-value of the corresponding
local maximum x = x|xi.
If the difference values reach
a local minimum and starts to increase before y = T|mxi
is reached, we reset the threshold value to 0 and wait for
another local maximum to be reached.
mxmx_iter = np.arange(xmx_idx[0], len(xsn))
xmx_idx_iter = np.append(xmx_idx, len(xsn))
knee_, norm_knee_, knee_x = 0.0, 0.0, None
for mxmx_i in range(len(xmx_idx_iter)):
# stopping criteria for exhasuting array
if mxmx_i == len(xmx_idx_iter) - 1:
break
# indices between maxima/minima
idxs = ( mxmx_iter > xmx_idx_iter[mxmx_i] ) * ( mxmx_iter < xmx_idx_iter[mxmx_i+1] )
between_local_mx = mxmx_iter[np.where(idxs)]
for j in between_local_mx:
if j in xmn_idx:
# reached a minima, x indices are unique
# only need to check if j is a min
if yd[j + 1] > yd[j]:
Tmx[mxmx_i] = 0
elif yd[j + 1] <= yd[j]:
print('If this is a minima, how would you ever get here: {}'.format(j,knee))
if yd[j] < Tmx[mxmx_i]:
if not knee_x:
knee_x = j
# declare a knee
knee = x[xmx_idx[mxmx_i]]
plt.xticks(np.arange(0,1.1,0.1))
plt.plot(xsn,ysn);
plt.plot(xd,yd);
plt.hlines(Tmx[0], plt.xlim()[0], plt.xlim()[1], colors='g', linestyles='dashed');
plt.vlines(xmx, plt.ylim()[0], plt.ylim()[1], colors='r', linestyles='dashed');
The vertical, red dashed line represents the x value of the knee point. The horizontal greeb dashed line represents the threshold value.
knee
0.22222222222222221
# normalized x value where the knee was determined
xsn[knee_x]
0.55555555555555558