import numpy as np import matplotlib.pyplot as plt from scipy.special import huber x_test = np.linspace(-5,5,500) plt.figure(figsize=(12,6), dpi=100) plt.plot(x_test,x_test**2, label='Quadratic') plt.plot(x_test,2*huber(1,x_test), linewidth=4, alpha=0.5, label='Huber') plt.axis('equal') plt.legend() plt.ylim(0,4) plt.grid() plt.show() np.random.seed(7030) d = 300 n = int(1.5*d) theta_true = np.random.randn(d) b_true = np.random.randn() X = np.random.randn(n,d) v = 1*np.random.randn(n) onev = np.ones(n) y_clean = X.dot(theta_true) y_corrupted = y_clean + v p = 0.08 factor = 2*np.random.binomial(1, 1-p, size=(n,)) - 1 y_flipped = factor*y_corrupted plt.figure(figsize=(14,8), dpi=100) plt.plot(y_clean, alpha=0.8, label='clean') plt.plot(y_corrupted, alpha=0.8, label='corrupted') plt.plot(y_flipped, alpha=0.8, label='8% flipped') plt.legend(), plt.grid() plt.show() import cvxpy as cp TESTS = 100 lsq_data = np.zeros(TESTS) huber_data = np.zeros(TESTS) prescient_data = np.zeros(TESTS) p_vals = np.linspace(0, 0.15, TESTS) for idx, p in enumerate(p_vals): print (idx, end=' ') if idx%20==19: print () # Generate the sign changes. factor = 2*np.random.binomial(1, 1-p, size=(n,)) - 1 y = factor*(X.dot(theta_true)+onev*b_true + v) # Form and solve a standard regression problem. theta = cp.Variable(d) b = cp.Variable() fit = cp.norm(theta - theta_true)/cp.norm(theta_true) cost = cp.norm(X@theta + onev*b - y) prob = cp.Problem(cp.Minimize(cost)) prob.solve() lsq_data[idx] = fit.value # Form and solve a prescient regression problem, # i.e., where the sign changes are known. cost = cp.norm(cp.multiply(factor, X@theta + onev*b) - y) prob = cp.Problem(cp.Minimize(cost)) prob.solve() prescient_data[idx] = fit.value # Form and solve the Huber regression problem. cost = cp.sum(cp.huber(X@theta + onev*b - y, 1)) prob = cp.Problem(cp.Minimize(cost)) prob.solve() huber_data[idx] = fit.value plt.figure(figsize=(14,8), dpi=100) plt.plot(p_vals, lsq_data, label='Least squares') plt.plot(p_vals, huber_data, label='Huber') plt.plot(p_vals, prescient_data, label='Prescient') plt.ylabel(r'$||\theta - \theta^\mathrm{true} ||_2/ ||\theta^\mathrm{true} ||_2$') plt.xlabel('p') plt.grid() plt.legend() plt.show()