import numpy as np
def get_N(dvc=10, delta=0.05, epsilon=0.05, initial_N=1000, tolerance = 1):
"""Uses recursion to iterate N until it converges within a tolerance
Args: dvc = VC dimension
delta = 1 - %confidence
epsilon = generalization error
initial_N = initial guess of sample size
tolerance = constraint at which to stop the recursion and state convergence
Returns: N = Number of samples required
"""
new_N = 8 / epsilon**2 * np.log((4 * ((2 * initial_N) ** dvc + 1)) / delta) # formula to generate new N
if abs(new_N - initial_N) < tolerance: # Did it converge within a specific tolerance?
return new_N
else: # If so return N
return get_N(dvc, delta, epsilon, new_N, tolerance) # If not iterate with new N
print("The closest numerical approximation of the minimum sample "\
"size that the VC generalization bound predicts is {}".format(int(get_N())))
import matplotlib.pyplot as plt
%matplotlib inline
N = np.linspace(start=3, stop=10000, dtype="float64")
dvc = 50
delta = 0.05
# need to take log of mH prior due to overflow error
# from Devroye bound since 4mH(N^2) --> overflow
# since mH(N) = N^dvc + 1
# log(mH(N)) is approximately dvc * log(N)
logmH = lambda constant, N, dvc: dvc * np.log(constant * N) # approximately log(mH)
epsilon_VC = lambda N, dvc, delta: np.sqrt(8 / N * (np.log(4) + logmH(2, N, dvc) - np.log(delta)))
epsilon_rademacher = lambda N, dvc, delta: (np.sqrt(2 / N * (np.log(2 * N) + logmH(1, N, dvc))) +
np.sqrt(2 / N * np.log(1 / delta)) +
1 / N)
epsilon_parr_and_vdb = lambda N, dvc, delta: (1 + np.sqrt(N * (np.log(6) + logmH(2, N, dvc) - np.log(delta)) + 1)) / N
epsilon_devroye = lambda N, dvc, delta: (2 +
np.sqrt(2 * N * (np.log(4) + logmH(1, N**2, dvc) - np.log(delta)) -
4 * (np.log(4) + logmH(1, N**2, dvc) - np.log(delta)) +
4)
) / (2 * (N - 2))
epsilons = [epsilon_VC(N, dvc, delta),
epsilon_rademacher(N, dvc, delta),
epsilon_parr_and_vdb(N, dvc, delta),
epsilon_devroye(N, dvc, delta)]
bounds = ["Original VC bound",
"Rademacher Penalty Bound",
"Parrondo and Van den Broek",
"Devroye"]
plt.title("$log(\epsilon)$ vs $N$")
plt.xlabel("$N$")
plt.ylabel("$log(\epsilon)$")
for i in range(4):
# take log of epsilon to zoom in!
plt.plot(N[-5:], np.log(epsilons[i][-5:]))
plt.legend(bounds,loc="best")
plt.show()
N = np.float(5)
epsilons = [epsilon_VC(N, dvc, delta),
epsilon_rademacher(N, dvc, delta),
epsilon_parr_and_vdb(N, dvc, delta),
epsilon_devroye(N, dvc, delta)]
bounds = ("Original VC bound",
"Rademacher Penalty Bound",
"Parrondo and Van den Broek",
"Devroye")
plt.title("$\epsilon$ at $N=5$")
plt.ylabel("$\epsilon$")
plt.xticks([0.3, 1.2, 2.2, 3.4], bounds, rotation=70)
plt.bar(np.arange(4), epsilons)
plt.show()
np.random.seed(seed=2)
x_range = np.linspace(-1, 1, 50)
x1 = np.random.uniform(-1, 1, 1)
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, 1)
y2 = np.sin(np.pi * x2)
b = np.mean(np.array([y1, y2]))
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.plot(x_range, np.sin(np.pi * x_range))
plt.plot(x1, y1, 'ro')
plt.plot(x2, y2, 'ro')
plt.axhline(b, color='green')
np.random.seed(seed=2)
x_range = np.linspace(-1, 1, 50)
x1 = np.random.uniform(-1, 1, 1)
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, 1)
y2 = np.sin(np.pi * x2)
a = (x1 * y1 + x2 * y2) / (x1 ** 2 + x2 ** 2)
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.plot(x_range, np.sin(np.pi * x_range))
plt.plot(x1, y1, 'ro')
plt.plot(x2, y2, 'ro')
plt.plot(x_range, a * x_range)
np.random.seed(seed=2)
x_range = np.linspace(-1, 1, 50)
x1 = np.random.uniform(-1, 1, 1)
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, 1)
y2 = np.sin(np.pi * x2)
a = (y2 - y1) / (x2 - x1)
b = y2 - a * x2
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.plot(x_range, np.sin(np.pi * x_range))
plt.plot(x1, y1, 'ro')
plt.plot(x2, y2, 'ro')
plt.plot(x_range, a * x_range + b)
np.random.seed(seed=2)
x_range = np.linspace(-1, 1, 50)
x1 = np.random.uniform(-1, 1, 1)
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, 1)
y2 = np.sin(np.pi * x2)
a = (x1**2 * y1 + x2**2 * y2) / (x1**4 + x2**4)
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.plot(x_range, np.sin(np.pi * x_range))
plt.plot(x1, y1, 'ro')
plt.plot(x2, y2, 'ro')
plt.plot(x_range, a * x_range ** 2)
np.random.seed(seed=2)
x_range = np.linspace(-1, 1, 50)
x1 = np.random.uniform(-1, 1, 1)
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, 1)
y2 = np.sin(np.pi * x2)
i = (x1**2 + x2**2)
j = (y1 + y2)
k = (x1**4 + x2**4)
m = (x1**2 * y1 + x2**2 * y2)
a = (2 * m - i * j) / (2 * k - i ** 2)
b = (j * k - i * m) / (2 * k - i ** 2)
plt.axhline(0, color='black')
plt.axvline(0, color='black')
plt.plot(x_range, np.sin(np.pi * x_range))
plt.plot(x1, y1, 'ro')
plt.plot(x2, y2, 'ro')
plt.plot(x_range, a * x_range ** 2 + b)
def get_a_hat_list(K = 10000):
"""Returns a list of a_hats based on the
number of random samples K
Args:
K: number of random samples
Returns:
a_hat_list: an array all the a_hats
"""
a_hat_list = np.zeros(K)
x1 = np.random.uniform(-1, 1, K) # random 1st point for K trials
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, K) # random 2nd point for K trials
y2 = np.sin(np.pi * x2)
a_hat_list = (x1 * y1 + x2 * y2) / (x1 ** 2 + x2 ** 2) # K a_hats
return a_hat_list
a_hat_list = get_a_hat_list() # save for a_bar, g_bar, bias and variance
a_bar = np.mean(a_hat_list)
print("g_bar(x) = {}x".format(round(a_bar, 2)))
x_range = np.linspace(-1, 1, 1000)
bias = np.mean((1.43 * x_range - np.sin(np.pi * x_range))**2)
print("bias = {}".format(np.round(bias, 2)))
variance = np.mean((np.outer(a_hat_list, x_range) - a_bar * x_range)**2)
print("variance = {}".format(round(variance, 2)))
def get_g_list(D = 100000):
x1 = np.random.uniform(-1, 1, D)
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, D)
y2 = np.sin(np.pi * x2)
g_list = (y2 - y1) / (x2 - x1)
int_list = y2 - g_list * x2
return g_list, int_list
g_list, int_list = get_g_list()
a = np.mean(g_list)
b = np.mean(int_list)
print("a = {}".format(a))
print("b = {}".format(b))
plt.ylim((-4,4))
plt.plot(x_range, np.sin(np.pi * x_range))
plt.plot(x_range, a * x_range + b)
def get_b_hat_list(K = 10000):
"""Returns a list of a_hats based on the
number of random samples K
Args:
K: number of random samples
Returns:
a_hat_list: an array all the a_hats
"""
b_hat_list = np.zeros(K)
x1 = np.random.uniform(-1, 1, K) # random 1st point for K trials
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, K) # random 2nd point for K trials
y2 = np.sin(np.pi * x2)
b_hat_list = (y2 + y1) / 2 # K b_hats
return b_hat_list
b_hat_list = get_b_hat_list()
b_bar = np.mean(b_hat_list)
print("g_bar(x) = {}".format(round(b_bar, 2)))
x_range = np.linspace(-1, 1, 1000)
bias = np.mean((b_bar - np.sin(np.pi * x_range))**2)
print("bias = {}".format(np.round(bias, 2)))
variance = np.mean((np.outer(b_hat_list, np.ones(len(b_hat_list))) - b_bar)**2)
print("variance = {}".format(np.round(variance, 2)))
print("E_out = bias + variance = {} + {} = {}".format(np.round(bias, 2),
np.round(variance, 2),
np.round(bias + variance, 2)))
def get_a_hat_list(K = 100000):
"""Returns a list of a_hats based on the
number of random samples K
Args:
K: number of random samples
Returns:
a_hat_list: an array all the a_hats
"""
a_hat_list = np.zeros(K)
x1 = np.random.uniform(-1, 1, K) # random 1st point for K trials
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, K) # random 2nd point for K trials
y2 = np.sin(np.pi * x2)
a_hat_list = (x1 * y1 + x2 * y2) / (x1 ** 2 + x2 ** 2) # K a_hats
return a_hat_list
a_hat_list = get_a_hat_list()
a_bar = np.mean(a_hat_list)
print("g_bar(x) = {}x".format(round(a_bar, 2)))
x_range = np.linspace(-1, 1, 1000)
bias = np.mean((a_bar*x_range - np.sin(np.pi * x_range))**2)
print("bias = {}".format(np.round(bias, 2)))
variance = np.mean((np.outer(a_hat_list, x_range) - a_bar * x_range)**2)
print("variance = {}".format(np.round(variance, 2)))
print("E_out = bias + variance = {} + {} = {}".format(np.round(bias, 2),
np.round(variance, 2),
np.round(bias + variance, 2)))
def get_a_hat_list_and_b_hat_list(K = 100000):
"""Returns a list of a_hats based on the
number of random samples K
Args:
K: number of random samples
Returns:
a_hat_list: an array all the a_hats
b_hat_list: an array all the b_hats
"""
a_hat_list = np.zeros(K)
b_hat_list = np.zeros(K)
x1 = np.random.uniform(-1, 1, K) # random 1st point for K trials
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, K) # random 2nd point for K trials
y2 = np.sin(np.pi * x2)
a_hat_list = (y2 - y1) / (x2 - x1) # K a_hats
b_hat_list = y2 - a_hat_list * x2 # K b_hats
return (a_hat_list, b_hat_list)
a_hat_list, b_hat_list = get_a_hat_list_and_b_hat_list()
a_bar = np.mean(a_hat_list)
b_bar = np.mean(b_hat_list)
print("g_bar(x) = {}x + {}".format(round(a_bar, 2), round(b_bar, 2)))
x_range = np.linspace(-1, 1, 1000)
bias = np.mean((a_bar*x_range + b_bar - np.sin(np.pi * x_range))**2)
print("bias = {}".format(np.round(bias, 2)))
variance = np.mean(((np.outer(a_hat_list, x_range) + np.expand_dims(b_hat_list, 1)) - (a_bar * x_range + b_bar))**2)
print("variance = {}".format(np.round(variance, 2)))
print("E_out = bias + variance = {} + {} = {}".format(np.round(bias, 2),
np.round(variance, 2),
np.round(bias + variance, 2)))
def get_a_hat_list(K = 100000):
"""Returns a list of a_hats based on the
number of random samples K
Args:
K: number of random samples
Returns:
a_hat_list: an array all the a_hats
"""
a_hat_list = np.zeros(K)
x1 = np.random.uniform(-1, 1, K) # random 1st point for K trials
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, K) # random 2nd point for K trials
y2 = np.sin(np.pi * x2)
a_hat_list = (x1**2 * y1 + x2**2 * y2) / (x1**4 + x2**4) # K a_hats
return a_hat_list
a_hat_list = get_a_hat_list()
a_bar = np.mean(a_hat_list)
print("g_bar(x) = {}x^2".format(round(a_bar, 2)))
x_range = np.linspace(-1, 1, 200)
bias = np.mean((a_bar*x_range**2 - np.sin(np.pi * x_range))**2)
print("bias = {}".format(np.round(bias, 2)))
variance = np.mean((np.outer(a_hat_list, x_range**2) - a_bar * x_range**2)**2)
print("variance = {}".format(np.round(variance, 2)))
print("E_out = bias + variance = {} + {} = {}".format(np.round(bias, 2),
np.round(variance, 2),
np.round(bias + variance, 2)))
def get_a_hat_list_and_b_hat_list(K = 100000):
"""Returns a list of a_hats based on the
number of random samples K
Args:
K: number of random samples
Returns:
a_hat_list: an array all the a_hats
b_hat_list: an array all the b_hats
"""
a_hat_list = np.zeros(K)
b_hat_list = np.zeros(K)
x1 = np.random.uniform(-1, 1, K) # random 1st point for K trials
y1 = np.sin(np.pi * x1)
x2 = np.random.uniform(-1, 1, K) # random 2nd point for K trials
y2 = np.sin(np.pi * x2)
i = (x1**2 + x2**2)
j = (y1 + y2)
k = (x1**4 + x2**4)
m = (x1**2 * y1 + x2**2 * y2)
a_hat_list = (2 * m - i * j) / (2 * k - i ** 2)
b_hat_list = (j * k - i * m) / (2 * k - i ** 2)
return (a_hat_list, b_hat_list)
a_hat_list, b_hat_list = get_a_hat_list_and_b_hat_list()
a_bar = np.mean(a_hat_list)
b_bar = np.mean(b_hat_list)
print("g_bar(x) = {}x^2 + {}".format(round(a_bar, 2), round(b_bar, 2)))
x_range = np.linspace(-1, 1, 1000)
bias = np.mean((a_bar*x_range**2 + b_bar - np.sin(np.pi * x_range))**2)
print("bias = {}".format(np.round(bias, 2)))
variance = np.mean(((np.outer(a_hat_list, x_range**2) + np.expand_dims(b_hat_list, 1)) -
(a_bar * x_range**2 + b_bar))**2)
print("variance = {}".format(np.round(variance, 2)))
print("E_out = bias + variance = {} + {} = {}".format(np.round(bias, 2),
np.round(variance, 2),
np.round(bias + variance, 2)))
from scipy.misc import comb
def make_mH_column(q, maxN=1000):
"""Creates a column of mHs based on q and N"""
mH_list = np.zeros(maxN)
mH = 2
for N in range(1, maxN+1):
mH = 2 * mH - comb(N, q)
mH_list[N-1] = mH
return mH_list
# make q columns
q_1_column = np.log(make_mH_column(q=1))
q_2_column = np.log(make_mH_column(q=2))
q_3_column = np.log(make_mH_column(q=3))
q_4_column = np.log(make_mH_column(q=4))
q_5_column = np.log(make_mH_column(q=5))
q_list = [q_1_column, q_2_column, q_3_column, q_4_column, q_5_column]
# make dvc columns
dvc_1 = np.log(np.arange(2,1002)**1)
dvc_2 = np.log(np.arange(2,1002)**2)
dvc_3 = np.log(np.arange(2,1002)**3)
dvc_4 = np.log(np.arange(2,1002)**4)
dvc_5 = np.log(np.arange(2,1002)**5)
dvc_list = [dvc_1, dvc_2, dvc_3, dvc_4, dvc_5]
title = "$mH$ bounds for $q = {}$"
q = 1
plt.title(title.format(q))
plt.plot(np.arange(1,1001), q_list[q-1], label="$q = {}$".format(q))
for dvc in np.arange(1,6):
plt.plot(np.arange(1,1001), dvc_list[dvc-1], label="$N^{}$".format(dvc))
plt.legend(loc="best")
q = 2
plt.title(title.format(q))
plt.plot(np.arange(1,1001), q_list[q-1], label="$q = {}$".format(q))
for dvc in np.arange(1,6):
plt.plot(np.arange(1,1001), dvc_list[dvc-1], label="$N^{}$".format(dvc))
plt.legend(loc="best")
q = 3
plt.title(title.format(q))
plt.plot(np.arange(1,1001), q_list[q-1], label="$q = {}$".format(q))
for dvc in np.arange(1,6):
plt.plot(np.arange(1,1001), dvc_list[dvc-1], label="$N^{}$".format(dvc))
plt.legend(loc="best")
q = 4
plt.title(title.format(q))
plt.plot(np.arange(1,1001), q_list[q-1], label="$q = {}$".format(q))
for dvc in np.arange(1,6):
plt.plot(np.arange(1,1001), dvc_list[dvc-1], label="$N^{}$".format(dvc))
plt.legend(loc="best")
q = 5
plt.title(title.format(q))
plt.plot(np.arange(1,1001), q_list[q-1], label="$q = {}$".format(q))
for dvc in np.arange(1,6):
plt.plot(np.arange(1,1001), dvc_list[dvc-1], label="$N^{}$".format(dvc))
plt.legend(loc="best")