import numpy as np
import bqplot.pyplot as plt
from ipywidgets import VBox, FloatSlider
Let us define the random variables $X$ and $Y$ by $$ f(x) = x \frac{1 + x}{1 + x^2}, \qquad X \sim \mathcal{N}(0, 1), \quad Y = f(X) + \varepsilon, $$ where $\varepsilon \sim \mathcal{N}(0, 1/4)$ is independent of X
def f(x):
return (x + x ** 2) / (1 + x ** 2)
n = 1000
sigma = 0.5
X = np.random.randn(n)
Y = f(X) + sigma * np.random.randn(n)
plt.figure(title='Joint sample, and Conditional expecation for (X, Y)')
plt.scatter(X, Y, alpha=0.3)
mesh = np.linspace(-4, 4, 201)
plt.plot(mesh, f(mesh), linewidth=3)
plt.show()
A Jupyter Widget
def reg_non_param(x, bdwidth, x_sample, y_sample):
"""Values of the non-parametric regression of Y wrt X using a Gaussian kernel."""
def kern(u, x):
"""Gaussian kernel function"""
return np.exp(-(u[:, np.newaxis] - x) ** 2 / (2 * bdwidth ** 2))
return np.sum(kern(x_sample, x) * y_sample[:, np.newaxis], axis=0) \
/ np.sum(kern(x_sample, x), axis=0)
Plotting non-parametric regressions of $Y$ with respect to $X$ with different values of the bandwidth:
plt.figure(legend_location='bottom-right')
plt.scatter(X, Y, alpha=0.3)
plt.plot(mesh, reg_non_param(mesh, 0.1, X, Y), 'b', linewidth=3, labels=['bandwidth 0.1'], display_legend=True)
plt.plot(mesh, reg_non_param(mesh, 0.2, X, Y), 'r', linewidth=3, labels=['bandwidth 0.2'], display_legend=True)
plt.plot(mesh, reg_non_param(mesh, 0.5, X, Y), 'g', linewidth=3, labels=['bandwidth 0.5'], display_legend=True)
plt.show()
A Jupyter Widget
figure = plt.figure(title='Non-parametric regression')
scatter = plt.scatter(X, Y, alpha=0.3, enable_move=True)
regression = plt.plot(mesh, np.zeros(mesh.shape), 'b', linewidth=3)
slider = FloatSlider(min=0.05, max=2.0, value=1.0, step=0.05, description='bandwidth')
def update(change=None):
regression.y = reg_non_param(regression.x, slider.value, scatter.x, scatter.y)
slider.observe(update, names=['value'])
scatter.observe(update, names=['x', 'y'])
update()
VBox([figure, slider])
A Jupyter Widget
def basis(knots, x):
"""Values of order-1 B-spline basis functions."""
nb_knots = len(knots)
diag = np.identity(nb_knots)
res = np.empty((len(x), nb_knots))
for i in range(nb_knots):
res[:, i] = np.interp(x, knots, diag[i])
return res
basis_len = 10
knots = np.linspace(-3.5, 3.5, basis_len)
plt.figure(title='Order-0 B-splines')
plt.plot(mesh, basis(knots, mesh).T, linewidth=2)
plt.ylim(0.0, 2.0)
plt.show()
A Jupyter Widget
def reg_param_coeffs(knots, x_sample, y_sample):
"""Computes the coefficients of the P-L regression of y_sample wrt. x_sample."""
bis = basis(knots, x_sample)
var = bis.T.dot(bis)
covar = y_sample.dot(bis)
return np.linalg.lstsq(var, covar.T)[0]
def eval_piecewise_linear(x, knots, coeffs):
"""Eveluates the piecewise linear function at the specified x for the knots and coeffs.
"""
return np.interp(x, knots, coeffs)
plt.figure()
plt.scatter(X, Y, alpha=0.3)
knots1 = np.linspace(-3.0, 3.0, 10)
plt.plot(mesh, eval_piecewise_linear(mesh, knots1, reg_param_coeffs(knots1, X, Y)), 'b', linewidth=3)
knots2 = np.linspace(-3.0, 3.0, 20)
plt.plot(mesh, np.interp(mesh, knots2, reg_param_coeffs(knots2, X, Y)), 'r', linewidth=3)
plt.title('Different collections of knots')
plt.show()
A Jupyter Widget
def second_derivative_on_dirac_basis(knots):
"""
Computes the coefficients of the second derivative of the basis functions
on the Dirac comb.
"""
nb_knots = len(knots)
res = np.zeros((nb_knots, nb_knots))
if nb_knots > 1:
res[0, 0] = -1.0 / (knots[1] - knots[0])
res[0, 1] = 1.0 / (knots[1] - knots[0])
for i in range(1, nb_knots - 1):
res[i, i - 1] = (1.0 / (knots[i] - knots[i - 1]))
res[i, i] = -(1.0 / (knots[i] - knots[i - 1]) + 1.0 / (knots[i + 1] - knots[i]))
res[i, i + 1] = 1.0 / (knots[i + 1] - knots[i])
if nb_knots > 1:
res[nb_knots - 1, nb_knots - 2] = 1.0 / (knots[nb_knots - 1] - knots[nb_knots - 2])
res[nb_knots - 1, nb_knots - 1] = -1.0 / (knots[nb_knots - 1] - knots[nb_knots - 2])
return res
def dirac_inner_product(knots, coeffs1, coeffs2):
"""
Equivalent to the finite-difference approximation for the second derivative.
"""
nb_knots = len(knots)
res = 0.0
for i in range(nb_knots):
res += 0.5 * (coeffs1[i] * coeffs2[i] + coeffs1[i - 1] * coeffs2[i - 1]) / (knots[i] - knots[i - 1])
return res
def tikhonov_matrix(knots):
"""Computes the second-order Tikhonov matrix of the B-splines corresponding to specified knots.
Note
----
The specified array of knots must be non-empty and increasingly sorted.
"""
basis_len = len(knots)
res = np.zeros((basis_len, basis_len))
coeffs_on_dirac_basis = second_derivative_on_dirac_basis(knots)
influence_order = 2
for i in range(basis_len):
min_j = max(0, i - influence_order)
max_j = min(basis_len, i + influence_order + 1)
for j in range(min_j, max_j):
res[i, j] = dirac_inner_product(knots, coeffs_on_dirac_basis[i], coeffs_on_dirac_basis[j])
return res
def penalized_pl_regression(knots, x_sample, y_sample, tikhonov_factor):
"""Compute the second-order penalized P-L regression of y_sample wrt. x_sample.
"""
bis = basis(knots, x_sample)
var = (bis.T).dot(bis) / len(x_sample)
covar = y_sample.dot(bis) / len(x_sample)
tikho = tikhonov_matrix(knots)
return np.linalg.lstsq(var + tikhonov_factor * tikho, covar.T)[0]
plt.figure(title='Testing multiple values for Tikhonov factor')
plt.scatter(X, Y, alpha=0.3)
knots = np.linspace(-3.0, 3.0, 25)
plt.plot(mesh, eval_piecewise_linear(mesh, knots, penalized_pl_regression(knots, X, Y, 0.01)), 'r', linewidth=3)
plt.plot(mesh, eval_piecewise_linear(mesh, knots, penalized_pl_regression(knots, X, Y, 0.1)), 'g', linewidth=3)
plt.show()
A Jupyter Widget
figure = plt.figure(title='Non-parametric regression')
scatter_s = plt.scatter(X, Y, alpha=0.3, enable_move=True)
spline = plt.plot(mesh, np.zeros(mesh.shape), 'b', linewidth=3)
tikhonov = FloatSlider(min=0.02, max=1.0, value=0.5, step=0.01, description='Tikhonov')
def update_spline(change=None):
spline.y = eval_piecewise_linear(spline.x, knots,
penalized_pl_regression(knots, scatter_s.x, scatter_s.y, tikhonov.value))
tikhonov.observe(update_spline, names=['value'])
scatter_s.observe(update_spline, names=['x', 'y'])
update_spline()
VBox([figure, tikhonov])
A Jupyter Widget