Finding a “Kneedle” in a Haystack: Detecting Knee Points in System Behavior
Ville Satopa, Jeannie Albrecht, David Irwin, and Barath Raghavan
https://www1.icsi.berkeley.edu/~barath/papers/kneedle-simplex11.pdf
import numpy as np
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
def figure2():
x = np.linspace(0.0, 1, 10)
with np.errstate(divide='ignore'):
return x,np.true_divide(-1, x + 0.1) + 5
x,y = figure2()
if not np.array_equal(np.array(x), np.sort(x)):
raise ValueError('x needs to be sorted')
from scipy.interpolate import interp1d
N = len(x)
# Ds = the finite set of x- and y-values that define a smooth curve,
# one that has been fit to a smoothing spline.
uspline = interp1d(x, y)
Ds_y = uspline(x)
plt.plot(x, Ds_y);
def normalize(a):
"""return the normalized input array"""
return (a - min(a)) / (max(a) - min(a))
# x and y normalized to unit square
x_normalized = normalize(x)
y_normalized = normalize(Ds_y)
# the difference curve
y_difference = y_normalized - x_normalized
x_difference = x_normalized.copy()
plt.title("Normalized spline & difference curve");
plt.plot(x_normalized, y_normalized);
plt.plot(x_difference, y_difference);
of the difference curve
from scipy.signal import argrelextrema
# local maxima for knees
maxima_indices = argrelextrema(y_difference, np.greater)[0]
x_difference_maxima = x_difference[maxima_indices]
y_difference_maxima = y_difference[maxima_indices]
# local minima
minima_indices = argrelextrema(y_difference, np.less)[0]
x_difference_minima = x_difference[minima_indices]
y_difference_minima = y_difference[minima_indices]
plt.title("local maxima in difference curve");
plt.plot(x_normalized, y_normalized);
plt.plot(x_difference, y_difference);
plt.hlines(y_difference_maxima, plt.xlim()[0], plt.xlim()[1]);
# Sensitivity parameter S
# smaller values detect knees quicker
S = 1.0
Tmx = y_difference_maxima - (S * np.abs(np.diff(x_normalized).mean()))
If any difference value (xdj, ydj), where j > i, drops below the threshold y = T|mxi
for (x|mxi, y|mxi) before the
next local maximum in the difference curve is reached,
Kneedle declares a knee at the x-value of the corresponding
local maximum x = x|xi.
If the difference values reach
a local minimum and starts to increase before y = T|mxi
is reached, we reset the threshold value to 0 and wait for
another local maximum to be reached.
# artificially place a local max at the last item in the x_difference array
maxima_indices = np.append(maxima_indices, len(x_difference) - 1)
minima_indices = np.append(minima_indices, len(x_difference) - 1)
# placeholder for which threshold region i is located in.
maxima_threshold_index = 0
minima_threshold_index = 0
curve = 'concave'
direction = 'increasing'
all_knees = set()
all_norm_knees = set()
# traverse the difference curve
for idx, i in enumerate(x_difference):
# reached the end of the curve
if i == 1.0:
break
# values in difference curve are at or after a local maximum
if idx >= maxima_indices[maxima_threshold_index]:
threshold = Tmx[maxima_threshold_index]
threshold_index = idx
maxima_threshold_index += 1
# values in difference curve are at or after a local minimum
if idx >= minima_indices[minima_threshold_index]:
threshold = 0.0
minima_threshold_index += 1
# Do not evaluate values in the difference curve before the first local maximum.
if idx < maxima_indices[0]:
continue
# evaluate the threshold
if y_difference[idx] < threshold:
if curve == 'convex':
if direction == 'decreasing':
knee = x[threshold_index]
all_knees.add(knee)
norm_knee = x_normalized[threshold_index]
all_norm_knees.add(norm_knee)
else:
knee = x[-(threshold_index + 1)]
all_knees.add(knee)
norm_knee = x_normalized[-(threshold_index + 1)]
all_norm_knees.add(norm_knee)
elif curve == 'concave':
if direction == 'decreasing':
knee = x[-(threshold_index + 1)]
all_knees.add(knee)
norm_knee = x_normalized[-(threshold_index + 1)]
all_norm_knees.add(norm_knee)
else:
knee = x[threshold_index]
all_knees.add(knee)
norm_knee = x_normalized[threshold_index]
all_norm_knees.add(norm_knee)
plt.xticks(np.arange(0,1.1,0.1))
plt.plot(x_normalized, y_normalized);
plt.plot(x_difference, y_difference);
plt.hlines(Tmx[0], plt.xlim()[0], plt.xlim()[1], colors='g', linestyles='dashed');
plt.vlines(x_difference_maxima, plt.ylim()[0], plt.ylim()[1], colors='r', linestyles='dashed');
The vertical, red dashed line represents the x value of the knee point. The horizontal greeb dashed line represents the threshold value.
knee
0.2222222222222222
# normalized x value where the knee was determined
norm_knee
0.2222222222222222