This was initially proposed in https://arxiv.org/pdf/1905.07047.pdf as a classical competitor to QAOA for MaxCut.
It works on D-regular, N-node graphs as follows:
Hastings defines performance $-0.5 \le \delta \le 0.5$ compared to a random partitioning of equal size, which on average would cut half the edges: $$ K = (1/2 + \delta)(DN/2) $$
Optimizing threshold T for each D almost always outperforms 1-step QAOA on D-regular triangle-free graphs, but with the same scaling $\delta \propto D^{-0.5}$.
This was completed on July 7 2020.
Hastings analyzes a subgraph of node $i$ and $j$ and their other neighbors. He defines the following functions:
Hastings computes the correlation function $\langle Z_i Z_j \rangle$, where $Z_l$ is the spin $\pm 1$ of node $l$. $\langle Z_i Z_j \rangle$ is the chance the nodes have the same spin (+1), subtracting the chance the nodes have opposite spin (-1). Thus, $1/2(1 - \langle Z_i Z_j \rangle)$ is 1 if the nodes have opposite spin, and 0 if the nodes have the same spin.
Since all subgraphs are identical, one can use linearity of expectation to count the number of edges to cut: $$ K = (DN/2)(1/2)(1 - \langle Z_i Z_j \rangle) $$
So, the performance $\delta = (-1/2)\langle Z_i Z_j \rangle$.
Hastings conducts the analysis for triangle-free graphs. Nodes $i$ and $j$ agree in spin with 1/2 chance. The below formulas show how $Z_i$ and $Z_j$ will update.
So, we can compute $\langle Z_i Z_j \rangle$: $$ \langle Z_i Z_j \rangle = 1/2 (\sum_{k=0}^{d-1} P_{d-1}(k) q(k+1))^2 - 1/2 (\sum_{k=0}^{d-1} P_{d-1}(k) q(k))^2 $$
import numpy as np
import matplotlib.pyplot as plt
from scipy.special import binom
def qsimple(k, threshold):
return -1 if k >= threshold else 1
def q(k, threshold):
"""Like qsimple above, but also works for numpy arrays"""
return (1-(k >= threshold)*2)
def test_qs():
"""Test q and qsimple methods"""
assert q(2, 3) == 1 == qsimple(2, 3), "testing q less than threshold"
assert q(3, 3) == -1 == qsimple(3, 3), "testing q equal to threshold"
assert q(3, 2) == -1 == qsimple(3, 2), "testing q higher than threshold"
assert np.all(q(np.array([2,3,4]), 3) == np.array([1, -1, -1])), "list of q"
assert np.all(q(np.array([2,3,4]), 3) == [qsimple(2, 3), qsimple(3,3), qsimple(4,3)]), "list of q"
test_qs()
def calc_performance(D, T):
"""
This calculates the performance over random after 1 step of Hastings' thresholding algorithm.
Precisely, returns delta = -1/2 <Z_i Z_j> for D-regular graphs with threshold T.
"""
s1 = 0
s2 = 0
# go through # of agreeing neighbors, from 0 through D-1
for i in range(D):
# factor out two copies of 2^-n, n=D-1
s1 += binom(D-1, i)*q(i+1, T)
s2 += binom(D-1, i)*q(i, T)
return -0.5 * 0.5 * (s1**2 - s2**2) * 2**(-2*(D-1))
def test_all_thresholds(D, fn):
"""
Gets performance over random for all possible thresholds (from 0 through D+1).
Returns all results.
"""
return [fn(D, T) for T in range(D+2)]
def print_max(D, results):
"""Finds and prints out the maximum performance in the input list."""
print("D =", D, "T =", results.index(max(results)), "performance =", max(results))
def plot_by_T(D, results, label_prefix="", scaled=False):
plt.plot(np.arange(len(results))/(D if scaled else 1), results, label=label_prefix + "D=" + str(D))
plt.xlabel("min threshold to switch")
plt.ylabel("performance $\delta$")
calc_performance(9, 9/2)
-0.0
def calc_performance_qf(D, T, qf):
"""
This calculates the performance over random after 1 step of Hastings' thresholding algorithm.
Precisely, returns delta = -1/2 <Z_i Z_j> for D-regular graphs with threshold T.
"""
s1 = 0
s2 = 0
# go through # of agreeing neighbors, from 0 through D-1
for i in range(D):
# factor out two copies of 2^-n, n=D-1
s1 += binom(D-1, i)*qf(i+1, T)
s2 += binom(D-1, i)*qf(i, T)
return -0.5 * 0.5 * (s1**2 - s2**2) * 2**(-2*(D-1))
def test_all_thresholds_qf(D, fn, qf):
"""
Gets performance over random for all possible thresholds (from 0 through D+1).
Returns all results.
"""
return [fn(D, T, qf) for T in range(D+2)]
def make_q2(s):
return lambda k, t: 1*(k <= t-1) + s*(k == t) + s*(k > t)
D = 60
inps = np.linspace(-0.8, 1, 5)
outs = []
for qf in [q] + [make_q2(s) for s in inps]:
results = test_all_thresholds_qf(D, calc_performance_qf, qf)
outs.append(max(results)*D)
print_max(D, results)
plot_by_T(D, D**0.5*np.array(results), qf.__name__)
plt.legend()
# plt.plot(inps, outs[1:])
D = 60 T = 34 performance = 0.04353484029031557 D = 60 T = 34 performance = 0.041446112492853224 D = 60 T = 33 performance = 0.03682291744378499 D = 60 T = 32 performance = 0.02960143695036295 D = 60 T = 31 performance = 0.018419759423244197 D = 60 T = 0 performance = -0.0
<matplotlib.legend.Legend at 0x7f65ec792210>
D = 299
inps = np.linspace(-1, 1, 5)
outs = []
for qf in [q] + [make_q2(s) for s in inps]:
results = test_all_thresholds_qf(D, calc_performance_qf, qf)
outs.append(max(results)*D**0.5)
# print_max(D, results)
# plot_by_T(D, results, qf.__name__)
# plt.legend()
plt.plot(inps, outs[1:])
[<matplotlib.lines.Line2D at 0x7f65ebf874d0>]
This plots performance by chosen threshold. The maximum value should match Table 1 in Hastings' paper. Notice that performance vs T is a shifted odd function.
for D in range(2, 20):
results = test_all_thresholds(D, calc_performance)
print_max(D, results)
if D % 3 == 0:
plot_by_T(D, results)
plt.legend()
D = 2 T = 2 performance = 0.25 D = 3 T = 3 performance = 0.1875 D = 4 T = 3 performance = 0.140625 D = 5 T = 4 performance = 0.15625 D = 6 T = 5 performance = 0.1220703125 D = 7 T = 5 performance = 0.128173828125 D = 8 T = 6 performance = 0.11663818359375 D = 9 T = 6 performance = 0.107666015625 D = 10 T = 7 performance = 0.107666015625 D = 11 T = 7 performance = 0.09252548217773438 D = 12 T = 8 performance = 0.0986623764038086 D = 13 T = 9 performance = 0.08860141038894653 D = 14 T = 9 performance = 0.09050001204013824 D = 15 T = 10 performance = 0.0853198766708374 D = 16 T = 10 performance = 0.08332019206136465 D = 17 T = 11 performance = 0.08158713206648827 D = 18 T = 11 performance = 0.07705451361835003 D = 19 T = 12 performance = 0.07778230123221874
<matplotlib.legend.Legend at 0x7f65ebed1f90>
This was completed on July 7 2020.
derivation at https://www.overleaf.com/read/thgyswvpgmwt
def calc_performance_one_triangle(D, T):
"""
This calculates the performance over random
after 1 step of Hastings' thresholding algorithm on a subgraph including 1 triangle.
Precisely, returns delta = -1/2 <Z_i Z_j> for D-regular graphs with threshold T.
"""
s1 = 0
s2 = 0
s3 = 0
# go through # of agreeing neighbors, from 0 through D-2
for i in range(D-1):
# factor out two copies of 2^-n, n=D-2
s1 += binom(D-2, i)*q(i+2, T)
s2 += binom(D-2, i)*q(i+1, T)
s3 += binom(D-2, i)*q(i, T)
return -0.5 * 0.25 * (s1**2 + s2**2 - 2*s2*s3) * 2**(-2*(D-2))
Notice that the maximum performance is slightly reduced when adding one triangle.
Perhaps it is because $j$ is slightly more likely to be "in sync" with $i$; i.e. they will share switch/stay behavior if they share many neighbors.
It is also interesting that the minimum performance is also slightly reduced. (Why?)
D=10
results = test_all_thresholds(D, calc_performance)
print_max(D, results)
plot_by_T(D, results, "triangle-free, ")
results = test_all_thresholds(D, calc_performance_one_triangle)
print_max(D, results)
plot_by_T(D, results, "one triangle, ")
plt.legend()
D = 10 T = 7 performance = 0.107666015625 D = 10 T = 7 performance = 0.09271240234375
<matplotlib.legend.Legend at 0x7f65ebe15a10>
Derivation at https://www.overleaf.com/read/thgyswvpgmwt
def calc_performance_y_triangles(D, T, Y):
"""
This calculates the performance over random
after 1 step of Hastings' thresholding algorithm on a subgraph including Y triangles.
Precisely, returns delta = -1/2 <Z_i Z_j> for D-regular graphs with threshold T.
"""
p_k_vec = binom(D-1-Y, range(D-Y))
out = 0
for l in range(0, Y+1):
q_kl1_vec = q(np.array(range(l+1, D-Y + l+1)), T)
q_kl_vec = q(np.array(range(l, D-Y + l)), T)
q_kyl_vec = q(np.array(range(Y-l, D-Y + Y-l)), T)
f_dyl = sum(p_k_vec * q_kl1_vec)**2 - sum(p_k_vec * q_kl_vec)*sum(p_k_vec * q_kyl_vec)
out += (f_dyl*binom(Y, l))
# add powers of 2 at the end
return -0.5 * 0.5 * out * 2**(-Y) * 2**((-2*(D-1-Y)))
def test_y_triangles_formula():
"""Tests Y triangle formula against 0 and 1 triangle formula."""
assert calc_performance(5, 2) == calc_performance_y_triangles(5, 2, 0)
assert calc_performance(5, 5) == calc_performance_y_triangles(5, 5, 0)
assert calc_performance_one_triangle(5, 2) == calc_performance_y_triangles(5, 2, 1)
test_y_triangles_formula()
def calc_performance_y_triangles_curried(Y):
"""Creates a 2-argument function given number of triangles Y."""
return lambda d, t: calc_performance_y_triangles(d, t, Y)
This confirms that the triangle-free results match the general results for Y=0.
D=8
results = test_all_thresholds(D, calc_performance)
print_max(D, results)
plot_by_T(D, results, "triangle-free, ")
results = test_all_thresholds(D, calc_performance_y_triangles_curried(0))
print_max(D, results)
plot_by_T(D, results, "Y=0, ")
plt.legend()
D = 8 T = 6 performance = 0.11663818359375 D = 8 T = 6 performance = 0.11663818359375
<matplotlib.legend.Legend at 0x7f65ebd87b50>
This confirms that the one-triangle results match the general results for Y=1.
D=8
results = test_all_thresholds(D, calc_performance_one_triangle)
print_max(D, results)
plot_by_T(D, results, "one triangle, ")
results = test_all_thresholds(D, calc_performance_y_triangles_curried(1))
print_max(D, results)
plot_by_T(D, results, "Y=1, ")
plt.legend()
D = 8 T = 6 performance = 0.1007080078125 D = 8 T = 6 performance = 0.1007080078125
<matplotlib.legend.Legend at 0x7f65ebcfbd90>
The more triangles shared by $i$ and $j$, the worse the performance. I think it is because the spin update of $i$ and $j$ are more influenced by the shared nodes than by the edge $E_{ij}$.
D=12
# from 0 through D-1 possible triangles
ys_to_plot = range(D)
for (results, y) in [(test_all_thresholds(D, calc_performance_y_triangles_curried(y)), y) for y in ys_to_plot]:
print_max(D, results)
if y % 2 == 0:
plot_by_T(D, results, "Y=" + str(y) + ", ")
plt.legend()
D = 12 T = 8 performance = 0.0986623764038086 D = 12 T = 8 performance = 0.08471488952636719 D = 12 T = 8 performance = 0.07089614868164062 D = 12 T = 8 performance = 0.0571441650390625 D = 12 T = 9 performance = 0.05065155029296875 D = 12 T = 9 performance = 0.044830322265625 D = 12 T = 9 performance = 0.03826904296875 D = 12 T = 9 performance = 0.03076171875 D = 12 T = 9 performance = 0.02197265625 D = 12 T = 9 performance = 0.01123046875 D = 12 T = 10 performance = 0.005126953125 D = 12 T = 0 performance = -0.0
<matplotlib.legend.Legend at 0x7f65ebc77790>
Testing: what about $Y = \sqrt{D}$?
# seems like O(1/\sqrt{D}) negative advantage.
Ds = np.array([5, 10, 20, 100, 300, 400, 500])
for D in Ds:
D = int(D)
y = int(4*D**0.5)
print(D, y)
# results = test_all_thresholds(D, calc_performance_y_triangles_curried(y))
results = [calc_performance_y_triangles_curried(y)(D, T) for T in range(int(0.4*D), int(0.6*D))]
print_max(D, results)
# kunal testing 2020-11-13
plt.plot((np.arange(len(results))/D - 0.1)*D**0.5, D**0.5*np.array(results), label="Y=" + str(y))
# plot_by_T(D, D**0.5*np.array(results), "Y=" + str(y) + ", ", scaled=True)
plt.title("performance * $\sqrt{D}$")
plt.legend()
plt.grid()
plt.xlabel('for now; k')
plt.ylim(0, 0.4)
5 8 D = 5 T = 0 performance = -0.0 10 12 D = 10 T = 0 performance = -0.0 20 17 D = 20 T = 0 performance = -0.15308570861816406 100 40 D = 100 T = 19 performance = 0.008213472052512473 300 69 D = 300 T = 45 performance = 0.005489552896813169 400 80 D = 400 T = 57 performance = 0.004801671222457815 500 89 D = 500 T = 69 performance = 0.004359136997572702
(0, 0.4)
Testing: what about Y = D/2?
# seems like O(1) (i.e. -0.25) negative advantage.
Ds = np.array([5, 10, 20, 100, 300])
for D in Ds:
D = int(D)
y = int(D/2)
print(D, y)
results = test_all_thresholds(D, calc_performance_y_triangles_curried(y))
print_max(D, results)
plot_by_T(D, np.array(results), "Y=" + str(y) + ", ", scaled=True)
# plt.title("performance * $\sqrt{D}$")
plt.legend()
5 2 D = 5 T = 4 performance = 0.078125 10 5 D = 10 T = 8 performance = 0.0390625 20 10 D = 20 T = 14 performance = 0.02521481364965439 100 50 D = 100 T = 60 performance = 0.005552559437854753 300 150 D = 300 T = 169 performance = 0.001612017055228795
<matplotlib.legend.Legend at 0x7f65ebb5ae90>
Testing: What about $Y = D^{2/3}$?
# seems like O(D^(-1/3)) negative advantage.
Ds = np.array([5, 10, 20, 100, 300, 500])
for D in Ds:
D = int(D)
y = int(D**(2/3))
print(D, y)
results = test_all_thresholds(D, calc_performance_y_triangles_curried(y))
print_max(D, results)
plot_by_T(D, D**(1/3)*np.array(results), "Y=" + str(y) + ", ", scaled=True)
plt.title("performance * $D^{1/3}$")
plt.legend()
5 2 D = 5 T = 4 performance = 0.078125 10 4 D = 10 T = 7 performance = 0.0472412109375 20 7 D = 20 T = 13 performance = 0.0350851034745574 100 21 D = 100 T = 58 performance = 0.014875157802169489 300 44 D = 300 T = 163 performance = 0.00788104941884438 500 62 D = 500 T = 267 performance = 0.005779483614400826
<matplotlib.legend.Legend at 0x7f65ebad6610>
Testing: What about $Y = D^{1/3}$? (it's \sqrt{D})
# seems like O(1) (i.e. -0.25) negative advantage.
Ds = np.array([5, 10, 20, 100, 300])
for D in Ds:
D = int(D)
y = int(D**(1/3))
print(D, y)
results = test_all_thresholds(D, calc_performance_y_triangles_curried(y))
print_max(D, results)
plot_by_T(D, D**(1/2)*np.array(results), "Y=" + str(y) + ", ", scaled=True)
# plt.title("performance * $\sqrt{D}$")
plt.legend()
5 1 D = 5 T = 4 performance = 0.1171875 10 2 D = 10 T = 7 performance = 0.0777587890625 20 2 D = 20 T = 13 performance = 0.06089535425417125 100 4 D = 100 T = 55 performance = 0.027958005378125067 300 6 D = 300 T = 159 performance = 0.01674118429821104
<matplotlib.legend.Legend at 0x7fdd31cf9710>
The maximum performance decreases when adding triangles for all D.
for y in range(10):
ds = range(2+y, 50)
fn = calc_performance_y_triangles_curried(y)
deltas = [max(test_all_thresholds(d, fn)) for d in ds]
plt.plot(ds, deltas, label=str(y) + " triangles")
plt.title("Performance vs D")
plt.xlabel("D")
plt.ylabel("performance $\delta$")
plt.legend()
plt.grid(True)
for y in range(10):
ds = range(2+y, 50)
fn = calc_performance_y_triangles_curried(y)
deltas = [max(test_all_thresholds(d, fn)) for d in ds]
plt.plot(ds, deltas, label=str(y) + " triangles")
plt.title("Performance vs D")
plt.xlabel("D")
plt.ylabel("performance $\delta$")
plt.legend()
plt.grid(True)
for y in range(0, 10, 2):
ds = range(2+y, 200)
fn = calc_performance_y_triangles_curried(y)
cs = [d**0.5*max(test_all_thresholds(d, fn)) for d in ds]
plt.plot(ds, cs, label=str(y) + " triangles")
plt.title("Scaled performance vs D (might tend towards constant $C = \delta \sqrt{D}$)")
plt.xlabel("D")
plt.ylabel("$\delta \sqrt{D}$")
plt.legend()
plt.grid(True)
for y in range(0, 30, 5):
ds = range(2+y, 90)
fn = calc_performance_y_triangles_curried(y)
deltas = [max(test_all_thresholds(d, fn)) for d in ds]
plt.loglog(ds, deltas, label=str(y) + " triangles")
plt.title("Delta vs D (might be a power law relationship)")
plt.xlabel("D")
plt.ylabel("performance $\delta$")
plt.legend()
plt.grid(True)
## from qaoa repo
from numpy import sin, cos, linspace, pi, e, tan
import numpy as np
from scipy.optimize import minimize
def delta_triangles(beta, gamma, D, Y):
"""This calculates delta, given a D-regular graph with Y triangles related to each edge."""
t1 = 0.5*sin(4*beta)*sin(gamma)*(cos(gamma)**(D-1))
t2 = 0.25*(sin(2*beta)**2)*(cos(gamma)**(2*(D-1-Y)))*(1-(cos(2*gamma)**Y))
return t1 - t2
opt_both = lambda x, d, y: -delta_triangles(x[0], x[1], d, y)
def get_max_opt(D, Y):
return -minimize(opt_both, (0,0), method='Nelder-Mead', args=(D,Y)).fun
for Y in range(0, 30, 3):
inputs = range(2+Y, 200)
plt.plot(inputs, [get_max_opt(D, Y)*(D**0.5) for D in inputs], label=str(Y) + " triangles")
plt.legend()
plt.title("Scaled performance vs D (might tend towards constant $C = \delta \sqrt{D}$)")
plt.xlabel("D")
plt.ylabel("$\delta \sqrt{D}$")
plt.legend()
plt.grid(True)
comparison
(np.log(120/45)/np.log(2))**2
2.0023311243653237
2**(2**0.5)
2.665144142690225
5 -> 45
10 -> 120
20 -> 400
0.75
np.log(2)
0.6931471805599453
Y=20
ds = range(2+Y, 20*Y, 10)
fn = calc_performance_y_triangles_curried(Y)
cs = [d**0.5*max(test_all_thresholds(d, fn)) for d in ds]
plt.plot(ds, cs, label="classical")
plt.plot(ds, [get_max_opt(D, Y)*(D**0.5) for D in ds], label="qaoa")
plt.title("Scaled performance vs D (with Y=" + str(Y) + " triangles)")
plt.xlabel("D")
plt.ylabel("$\delta \sqrt{D}$")
plt.legend()
plt.grid(True)
what about high y?
y=d-1 --> 0 y=d/2 --> ~0.55/D
0.22/0.17
1.2941176470588234
0.17/0.13
1.3076923076923077
alpha=0.9
ds = range(2, 200, 4)
cs = [d**2*max(
test_all_thresholds(d, calc_performance_y_triangles_curried(int(d*alpha)))
) for d in ds]
plt.plot(ds, cs, label="classical")
plt.grid()
A full derivation is at https://www.overleaf.com/read/thgyswvpgmwt
All together,
$$ \langle Z_i Z_j \rangle_2 = \sum_{k=0}^n \sum_{u=0}^n P_n(k) P_n(u) 0.5\big(Z_{i2,k} Z_{j2,u}\Big\rvert_{Z_{i1} = q(k+1),Z_{j1} = q(u+1),Z_{i0}=Z_{j0} = 1} + Z_{i2,k} Z_{j2,u}\Big\rvert_{Z_{i1} = q(k),Z_{j1} = -q(u), Z_{i0}=1, Z_{j0} = -1}\big) $$Assume the threshold is the same each time. This could be made different later...
import functools
def make_qfn(T):
return lambda x: (x < T)*2 - 1
# return lambda x: 1 if x < T else -1
qfns = [make_qfn(T) for T in range(10000)]
import scipy.stats
# %%timeit 100us
#calculate binomial probability
scipy.stats.binom.pmf(k=10, n=12, p=0.6)
114 µs ± 4.28 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
# %%timeit 1us
binompmf(12, 10, 0.6)
0.06385228185599999
@functools.lru_cache(maxsize=4096)
def Z1plus(T, n):
qf = qfns[T]
return 2**(-n)*sum([binom(n, m)*qf(m+1) for m in range(0, n+1)])
@functools.lru_cache(maxsize=4096)
def Z1minus(T, n):
qf = qfns[T]
return (-1)*2**(-n)*sum([binom(n, m)*qf(m) for m in range(0, n+1)])
@functools.lru_cache(maxsize=4096)
def H(T, n, k,l,r):
p_plus = (1 + Z1plus(T, n))/2
p_minus = (1 + Z1minus(T, n))/2
return binompmf(k, l, p_plus) * binompmf(n-k, r, p_minus)
@functools.lru_cache(maxsize=None)
def binompmf(tot, times, p):
return p**times * (1 - p)**(tot-times) * binom(tot, times)
@functools.lru_cache(maxsize=None)
def bigQ(T, n, l, r, a, b):
qf = qfns[T]
return qf((n-l-r)*(1-a)/2 + (l+r)*(1+a)/2 + (1+b)/2)
# optimized version
# @functools.lru_cache(maxsize=None)
# def Z2(Z1, Z0, match, T1, T2, n, k):
# f = lambda lr: H(T1, n, k, lr[0], lr[1])*bigQ(T2, n, lr[0], lr[1], Z1*Z0, match)
# return Z1*np.sum( f(np.stack(
# np.meshgrid(np.arange(k+1), np.arange(n-k+1)),
# -1).reshape(-1, 2).T))
@functools.lru_cache(maxsize=None)
def Z2(Z1, Z0, match, T1, T2, n, k):
out = 0
for l in range(k+1):
for r in range(n-k+1):
out += H(T1, n, k, l, r)*bigQ(T2, n, l, r, Z1*Z0, match)
return Z1*out
# @functools.lru_cache(maxsize=4096)
def final(n, T1, T2):
"""
Calculates -1/2 * <Z_i Z_j>
"""
out = 0
q1 = qfns[T1]
for k in range(n+1):
for u in range(n+1):
equal = Z2(q1(k+1), 1, q1(k+1)*q1(u+1), T1, T2, n, k) * Z2(q1(u+1), 1, q1(k+1)*q1(u+1), T1, T2, n, k)
unequal = Z2(q1(k), 1, -q1(k)*q1(u), T1, T2, n, k) * Z2(-q1(u), -1, -q1(k)*q1(u), T1, T2, n, k)
out += binom(n, k) * binom(n, u) * 0.5 * (equal + unequal)
return -0.5 * 2**(-2*n) * out
%load_ext line_profiler
The line_profiler extension is already loaded. To reload it, use: %reload_ext line_profiler
It should be odd function over the midpoint. Both T=0 and T> n+1 should be 0 (i.e. no improvement).
import time
# ! pip install line-profiler
# %load_ext line_profiler
# %lprun -f final final(70, makeq(3), makeq(3))
%%time
# recall that n = D-1
D=30
xs = range(D+2)
plt.scatter(xs, [final(D-1, t, t) for t in xs])
plt.plot(xs, [0]*len(xs))
CPU times: user 1.07 s, sys: 8.02 ms, total: 1.08 s Wall time: 1.08 s
[<matplotlib.lines.Line2D at 0x7f2116397950>]
def get_max_final(d):
return max([final(d-1, t, t) for t in range(d+2)])
get_max_final(3)
0.191162109375
%lprun -f Z2 get_max_final(20)
/home/kunal/anaconda3/lib/python3.7/site-packages/line_profiler/line_profiler.py:328: UserWarning: Could not extract a code object for the object <functools._lru_cache_wrapper object at 0x7f20c4b6bb90> profile = LineProfiler(*funcs)
Timer unit: 1e-06 s
%time get_max_final(60)
CPU times: user 15.2 s, sys: 64 ms, total: 15.3 s Wall time: 15.3 s
0.05319450252407808
Z2.cache_info()
CacheInfo(hits=2910809, misses=44635, maxsize=None, currsize=44635)
%prun get_max_final(62)
6766289 function calls in 16.286 seconds Ordered by: internal time ncalls tottime percall cumtime percall filename:lineno(function) 19529 10.069 0.001 14.655 0.001 <ipython-input-438-6c0bca817459>:9(Z2) 2666496 3.864 0.000 4.330 0.000 <ipython-input-425-f80e789bd32f>:11(H) 64 1.395 0.022 16.285 0.254 <ipython-input-439-ed2194217842>:2(final) 142938 0.449 0.000 0.449 0.000 <ipython-input-425-f80e789bd32f>:17(binompmf) 3448500 0.292 0.000 0.292 0.000 <ipython-input-415-ea75fd769ab5>:2(<lambda>) 488372 0.200 0.000 0.256 0.000 <ipython-input-425-f80e789bd32f>:21(bigQ) 64 0.007 0.000 0.008 0.000 <ipython-input-425-f80e789bd32f>:4(<listcomp>) 64 0.007 0.000 0.007 0.000 <ipython-input-425-f80e789bd32f>:9(<listcomp>) 128 0.001 0.000 0.001 0.000 {built-in method builtins.sum} 64 0.000 0.000 0.008 0.000 <ipython-input-425-f80e789bd32f>:1(Z1plus) 64 0.000 0.000 0.008 0.000 <ipython-input-425-f80e789bd32f>:6(Z1minus) 1 0.000 0.000 16.286 16.286 <ipython-input-445-f1a68aa578ec>:2(<listcomp>) 1 0.000 0.000 16.286 16.286 {built-in method builtins.exec} 1 0.000 0.000 16.286 16.286 <ipython-input-445-f1a68aa578ec>:1(get_max_final) 1 0.000 0.000 0.000 0.000 {built-in method builtins.max} 1 0.000 0.000 16.286 16.286 <string>:1(<module>) 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
performance is decent until about n=60 it takes ~1s per data point. spent some time 12/24 optimizing...
%%time
ds = range(2, 62)
deltas = [get_max_final(d) for d in ds]
# plt.plot(ds, deltas, label="p=2, girth > 5")
deltas2 = [d**0.5 * delt for d, delt in zip(ds, deltas)]
plt.plot(ds, deltas2, label="2-step, girth > 5")
plt.grid()
CPU times: user 14.9 s, sys: 24 ms, total: 14.9 s Wall time: 14.9 s
ds = range(2, 45)
deltas = [get_max_final(d) for d in ds]
plt.plot(ds, deltas, label="p=2, girth > 5")
fn = calc_performance_y_triangles_curried(0)
deltas = [max(test_all_thresholds(d, fn)) for d in ds]
plt.plot(ds, deltas, label="p=1, girth > 3")
plt.title("Performance vs D")
plt.xlabel("D")
plt.ylabel("performance $\delta$")
plt.legend()
plt.grid(True)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-15-b3feae8a6daf> in <module> 3 plt.plot(ds, deltas, label="p=2, girth > 5") 4 ----> 5 fn = calc_performance_y_triangles_curried(0) 6 deltas = [max(test_all_thresholds(d, fn)) for d in ds] 7 plt.plot(ds, deltas, label="p=1, girth > 3") NameError: name 'calc_performance_y_triangles_curried' is not defined
ds = range(2, 50)
deltas = [d**0.5 * max([final(d-1, t, t) for t in range(d+2)]) for d in ds]
plt.plot(ds, deltas, label="2-step, girth > 5")
fn = calc_performance_y_triangles_curried(0)
cs = [d**0.5*max(test_all_thresholds(d, fn)) for d in ds]
plt.plot(ds, cs, label="1-step, girth > 3")
plt.title("Scaled performance vs D (might tend towards constant $C = \delta \sqrt{D}$)")
plt.xlabel("D")
plt.ylabel("performance $\delta \sqrt{D}$")
plt.legend()
plt.grid(True)
August 20 2020
This presentation was given in the HRSS paper. It has fewer terms. https://arxiv.org/pdf/1402.2543.pdf
def zi_zj(d, t):
sum1 = sum([binom(d-1, k) for k in range(0, t-1)])
sum2 = sum([binom(d-1, k) for k in range(t, d)])
return 4**(1-d) * binom(d-1, t-1) * (sum1 - sum2)
import functools
import matplotlib.pyplot as plt
from scipy.special import binom
import numpy as np
from numpy import pi, exp, e
This formula is even simpler, assuming the threshold is above $D/2$, also from HRSS.
@functools.lru_cache()
def zi_zj_simpler(d, t):
sum1 = sum([binom(d-1, k) for k in range(d-t+1, t)])
return 4**(1-d) * binom(d-1, t-1) * sum1
D=20
plt.scatter(range(D+2), [zi_zj(D,t) for t in range(D+2)])
plt.scatter(range(D+2), [zi_zj_simpler(D,t) for t in range(D+2)])
<matplotlib.collections.PathCollection at 0x7f6359d62f10>
This plot ensures that the Hastings formula and HRSS formula are the same.
for D in range(20):
results = test_all_thresholds(D, calc_performance)
# print_max(D, results)
# plot_by_T(D, results)
plt.plot(range(D+2), [zi_zj(D,t)-results[t] for t in range(D+2)])
# plt.legend()
outs = [0]*5
rang = range(5, 500)
for D in rang:
inps = range(int(D/2), int(D/2+D**0.5))
results= [zi_zj_simpler(D, t) for t in inps]
outs.append(inps[np.argmax(results)])
plt.plot(rang, [i**0.5*(outs[i]/i-0.5) for i in rang], label="best threshold")
plt.plot(rang, [1/2 for d in rang], label="k=0.5")
plt.plot(rang, [.4386 for d in rang], label="k=0.4386") # my guess at the best threshold
plt.title("best threshold $T = D/2 + k\sqrt{D}$ vs $D$")
plt.xlabel("D")
plt.ylabel("$k$")
plt.legend()
<matplotlib.legend.Legend at 0x7f6359c621d0>
The above graph shows the best threshold at $0.4 \le k \le 0.5$.
def test(n, k):
return (2/(n*pi))**0.5 * exp(-2*k*k)
def exact(n,k):
return 2**-n *binom(n, n/2 + k*n**0.5)
def test2(n, k):
return (n/2 - k*n**0.5) / (n/2 + k*n**0.5 + 1)
compare n choose k with asymptotic error terms (scale with $n^{-3/2}$)
# k above ~ 1/e it will go below zero!
for k in np.linspace(0.43, 0.47, 11):
f = lambda n: (test(n,k) - exact(n,k))*n**1.5
inps = np.linspace(20, 1000, 100)
plt.plot(inps, f(inps), label="k=" + str(k))
plt.legend()
plt.grid()
from scipy.special import erf
for n in np.linspace(10, 19, 10)**2:
sumexact = lambda k: 2**-n*sum([binom(n, i) for i in range(int(n/2 - k*n**0.5), int(n/2 + k*n**0.5))])
sumapprox = lambda k: erf(k*2**0.5)
inps = np.linspace(0.42, 0.45, 20000)
plt.scatter(inps, [(sumapprox(i)-sumexact(i))*n**0.5 for i in inps], label="n=" + str(n))
plt.legend()
plt.grid()
def Phi(z):
return 0.5*(1 + erf(z*2**-0.5))
def normal_approx(D, t):
n = D-1
mu = n/2
var = n/4
integral = Phi((t-1 - mu)*var**-0.5) - Phi((D-t - mu)*var**-0.5)
return 2**(1-D)*binom(D-1, t-1)*integral
def normal_approx_improved(D, t):
k= (t - D/2)*D**-0.5
n = D-1
mu = n/2
var = n/4
integral = erf(2**-0.5 * (2*k*(D/(D-1))**0.5 - (D-1)**-0.5))
#same as below
#integral = Phi((t-1 - mu)*var**-0.5) - Phi((D-t - mu)*var**-0.5)
return (2/(pi*(D-1)))**0.5 * exp((-1/(2*n)) * (D-2*t + 1)**2) * integral
def normal_approx_symmetric(D, t):
k= (t - D/2)*D**-0.5
n = D-1
mu = n/2
var = n/4
integral = erf(k*2**0.5)
return (2/(pi*(D-1)))**0.5 * exp((-1/(2*n)) * (D-2*t + 1)**2) * integral
def normal_approx_sym2(D, t):
k= (t - D/2)*D**-0.5
return (2/(pi*(D-1)))**0.5 *exp(-2*k*k)* erf(k*2**0.5)
def normal_easy(k):
return (2/pi)**0.5 * exp(-2*k*k)*erf(k*2**0.5)
binom(1000, 500)
2.7028824094560503e+299
inps = np.linspace(0.4, 0.45, 100)
kmax = lambda k: exp(-4*k*k)/(k*pi) # when f'(k) = 0
plt.plot(inps, normal_easy(inps))
plt.plot(inps, kmax(inps))
plt.grid()
exp(.01)
1.010050167084168
# this is f'(k)
simple = lambda k: exp(-2*k*k) - (2*pi)**0.5 * k * erf(k*2**0.5)
inps = np.linspace(0, 1, 200)
plt.plot(inps, simple(inps))
plt.grid()
# asymptotic of best...
# matches hastings numerical approach of 0.34
rang = np.arange(20000)/(20000/1.4) - 0.7
results = [normal_easy(k) for k in rang]
plt.plot(rang, results)
print("k_max =", rang[np.argmax(results)])
print("performance/sqrt{D} =",max(results))
plt.grid()
k_max = 0.43848 performance/sqrt{D} = 0.3364933629451221
for D in [50, 100, 200]:
kmin =-1
kmax=1
rang = np.arange(int(D/2 + kmin*D**0.5), int(D/2 + kmax*D**0.5))
krang = (rang-D/2)*D**-0.5
plt.plot(krang, [zi_zj(D,t) for t in rang], label="D=" + str(D))
plt.plot(krang, [normal_approx(D, t) for t in rang])
plt.plot(krang, [normal_approx_improved(D, t) for t in rang])
plt.scatter(krang, [D**-0.5*normal_easy((t-0.5*D)*D**-0.5) for t in rang], s=3)
plt.legend()
plt.title("all results (performance vs D)")
plt.xlabel("k, $T = D/2 + k\sqrt{D}$")
plt.ylabel("expected cut fraction - 1/2")
plt.grid()
for D in [50, 100, 200]:
kmin =-1
kmax=1
rang = np.arange(int(D/2 + kmin*D**0.5), int(D/2 + kmax*D**0.5))
krang = (rang-D/2)*D**-0.5
plt.grid()
plt.scatter(krang, [D**1.5*normal_approx(D, t) - D**1.5*zi_zj(D, t) for t in rang], s=3, label="D=" + str(D))
plt.title("error (normal -> zi_zj), which is $\propto D^{3/2}$")
plt.xlabel("k, $T = D/2 + k\sqrt{D}$")
plt.ylabel("$D^{3/2}$ * (expected cut fraction - 1/2)")
plt.legend()
# approximation is ~ 0.15 / n^3/2 ... approx bigger by 1/\sqrt{n} * 0.15 / n ... maximized at k_max
# could be related to D vs D-1 sub
for D in [50, 100, 200, 400]:
kmin =-1
kmax=1
rang = np.arange(int(D/2 + kmin*D**0.5), int(D/2 + kmax*D**0.5))
krang = (rang-D/2)*D**-0.5
plt.grid()
plt.scatter(krang, [D**1.5*normal_approx_improved(D, t) - D**1.5*normal_approx(D, t) for t in rang], s=3, label="D=" + str(D))
plt.title("error (normal improved -> normal), which is $\propto D^{3/2}$")
plt.xlabel("k, $T = D/2 + k\sqrt{D}$")
plt.ylabel("$D^{3/2}$ * (expected cut fraction - 1/2)")
plt.legend()
# approximation is ~ 0.12 / n^3/2 ... could be related to D vs D-1 sub
for D in [50, 100, 200]:
kmin =-1
kmax=1
rang = np.arange(int(D/2 + kmin*D**0.5), int(D/2 + kmax*D**0.5))
krang = (rang-D/2)*D**-0.5
plt.grid()
plt.scatter(krang, [D*normal_approx_symmetric(D, t) - D*normal_approx_improved(D, t) for t in rang], s=3, label="D=" + str(D))
plt.title("error (normal symmetric -> normal improved), which is $\propto D$")
plt.xlabel("k, $T = D/2 + k\sqrt{D}$")
plt.ylabel("$D$ * (expected cut fraction - 1/2)")
plt.legend()
# approximation is ~ 0.7 / n .. looks like .7* e^-2k^2 / n .. makes sense it's missing the binomial expression
for D in [50, 100, 200]:
kmin =-1
kmax=1
rang = np.arange(int(D/2 + kmin*D**0.5), int(D/2 + kmax*D**0.5))
krang = (rang-D/2)*D**-0.5
plt.grid()
plt.scatter(krang, [D*normal_approx_sym2(D, t) - D*normal_approx_symmetric(D, t) for t in rang], s=3, label="D=" + str(D))
plt.title("error (normal sym2 -> normal symmetric), which is $\propto D$")
plt.xlabel("k, $T = D/2 + k\sqrt{D}$")
plt.ylabel("$D$ * (expected cut fraction - 1/2)")
plt.legend()
# approximation is ~ -0.35 / n .. looks like (e^-4k^2 - e^-2k^2) / n.. but it's < 0 !!!
# where does the negative influence come from?
test2 = lambda x: 1.5*exp(-2*x*x) * (exp(-2*x*x)-1) # can I derive this approximation? e^-2k^2(0.6 - 2.5x^2)?
test3 = lambda x: -1.5*erf(x*2**0.5)*exp(-2*x*x)*x # systematic undershoot from e^-2k^2 (1 - 2k/ \sqrt{D})
# threshold is slightly high... is actually D/2 - 1/2
plt.plot(np.linspace(kmin, kmax, 300), test3(np.linspace(kmin, kmax, 300)))
# TEST binom coeff approx
inps = range(10, 1000, 1)
k=0.3
plt.scatter(inps, [n*((2/pi/n)**0.5 * exp(-2*k*k) - 2**-n * binom(n, int(n/2 + k*n**0.5))) for n in inps])
plt.grid()
# plt.ylim(-.01, .01)
.33649*.98
0.3297602
g = lambda k, D: Phi(2*k - (D-1)**-0.5) - Phi(-2*k + 3*(D-1)**-0.5)
gs = lambda k, D: Phi(2*k) - Phi(-2*k)
k=0.6
inps = range(10, 1000)
plt.plot(inps, [D**0.5*(gs(k, D) - g(k, D)) for D in inps])
[<matplotlib.lines.Line2D at 0x7f63597d6490>]
(1/1000)**.5
0.03162277660168379
# another test
k=0.1
D=1000
erf(2**0.5 * k * (1 + 1/(D-1))**0.5) / erf(2**0.5 * k)
1.0004937342143372
for D in [50, 100, 200]:
kmin =-1
kmax=1
rang = np.arange(int(D/2 + kmin*D**0.5), int(D/2 + kmax*D**0.5))
krang = (rang-D/2)*D**-0.5
plt.grid()
plt.scatter(krang, [D*normal_easy((t-0.5*D)*D**-0.5) - D**1.5*normal_approx_sym2(D, t) for t in rang], s=3, label="D=" + str(D))
# normal easy is off by 0.15 / n^3/2 .. from the D vs D-1 thing
plt.title("error (normal easy -> normal sym2), which is $\propto D^{3/2}$")
plt.xlabel("k, $T = D/2 + k\sqrt{D}$")
plt.ylabel("$D^{3/2}$ * (expected cut fraction - 1/2)")
plt.legend()
for D in range(50, 500, 50):
kmin =-1
kmax=1
rang = np.arange(int(D/2 + kmin*D**0.5), int(D/2 + kmax*D**0.5))
krang = (rang-D/2)*D**-0.5
plt.grid()
plt.scatter(krang, [(D**0.5*normal_easy((t-0.5*D)*D**-0.5) - D*zi_zj(D, t)) for t in rang], s=3, label="D=" + str(D))
test = lambda x: 2.3*exp(-2*x*x) * (exp(-2*x*x)-0.7) # can I derive this approximation? e^-2k^2(0.6 - 2.5x^2)?
plt.plot(np.linspace(kmin, kmax, 300), test(np.linspace(kmin, kmax, 300)))
plt.title("all error (normal easy -> zi_zj), which is $\propto D$")
plt.legend()
# approximation is ~ 0.65 / n at k=0... bigger by 1/\sqrt{n} * 0.65 / \sqrt{n}
# at k_max, it's much smaller..
# the approximation is e^-2k^2 (a - bk^2) / n ..
0.33/\sqrt{D} is overshoot by 0.3/D
0.5*1000**-0.5
0.015811388300841896
.336*.7
0.2352
exp(-2*0.3*0.3)
0.835270211411272
erf(0.5*2**0.5)
0.6826894921370859
.68*.7
0.476
.84-.47
0.37
1/1000**0.5
0.03162277660168379
similar to Hastings D=3,4 alg. start uniform. "rocks and pebbles" is what I call it
def measure(trials, D, c):
successes = 0
for _ in range(trials):
rands = np.random.rand(2*D)*2 - 1
a = rands[0]
k = sum(rands[1:D])
b = rands[D]
l = sum(rands[D+1:])
if np.sign(a + c*k + c*b) != np.sign(b + c*l + c*a):
successes += 1
return (successes/trials - 0.5)*D**0.5
inps = range(10, 100, 10)
plt.plot(inps, [measure(100000, D, -D**-0.5) for D in inps])
[<matplotlib.lines.Line2D at 0x7f6359874150>]
for D in [10, 30, 100]:
cs = np.linspace(-0.5, 0, 100)
outs = []
for c in cs:
trials = 10000
outs.append(measure(trials, D, c))
plt.plot(cs, outs, label="D=" + str(D))
plt.legend()
plt.grid()
.173*3**-0.5
0.09988159656980525
looks like $c \approx 1/\sqrt{D}$
Initial checks. doesn't look like it performs much better (or worse) than the HRSS, best still > $1/2 + .3/\sqrt{D}$
as D gets large, then k, l are from uniform distribution.
# r is triangle distribution
def P(a, b, z, s, D, r=0):
rtD1 = (D-1)**0.5
bma = Phi((b+r)/s/rtD1 - a/z/s)
amb = Phi((a+r)/s/rtD1 - b/z/s)
return bma*(1 - amb) + amb*(1 - bma)
# should be identical to P
# so need to average < erf x erf y > over distribution to get performance
def P2(a, b, z, s, D):
rtD1 = (D-1)**0.5
return 0.5- 0.5*erf(2**-0.5*(a/s/rtD1 - b/z/s))*erf(2**-0.5*(b/s/rtD1 - a/z/s))
from math import factorial
from numpy import sign
# 2* irwinhall - 1
# sum of U(-1, 1)
def sumuniform(n, inp):
u = (inp + n)/2
return 2**-1 * (1/2) * factorial(n-1)**-1 * sum([binom(n, k) * (-1)**k * (u-k)**(n-1) * sign(u-k) for k in range(0, n+1)])
n=2
inps = np.linspace(-n, n, 1000)
plt.plot(inps, [sumuniform(n, i) for i in inps])
[<matplotlib.lines.Line2D at 0x7f6359b6e690>]
def get_perf_uniform(z, D, dim, n=0):
# try uniform
s = 3**-0.5
out = 0
rs = [sumuniform(n, r) for r in np.linspace(-n, n, n*dim)]
for a in np.linspace(-1, 1, dim):
for b in np.linspace(-1, 1, dim):
# run once if n=0
out += P(a, b, z, s, D, 0)
for r in rs:
out += P(a, b, z, s, D, r)
return out/dim/dim/(len(rs) + 1) - 0.5
inps = range(10, 410, 30)
plt.plot(inps, [D**0.5*get_perf_uniform(1, D, 100) for D in inps])
plt.grid()
zs = np.linspace(0.01, 3, 40)
for D in [100, 200, 400]:
plt.plot(zs, [D**0.5*get_perf_uniform(z, D, 100) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
<matplotlib.legend.Legend at 0x7f635979b090>
D=100
zs = np.linspace(0.01, 3, 40)
for n in range(0, 40, 5):
plt.plot(zs, [D**0.5*get_perf_uniform(z, D, 20, n) for z in zs], label="n=" + str(n))
plt.grid()
plt.legend()
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-96-5fba4fb9faad> in <module> 2 zs = np.linspace(0.01, 3, 40) 3 for n in range(0, 40, 5): ----> 4 plt.plot(zs, [D**0.5*get_perf_uniform(z, D, 20, n) for z in zs], label="n=" + str(n)) 5 plt.grid() 6 plt.legend() <ipython-input-96-5fba4fb9faad> in <listcomp>(.0) 2 zs = np.linspace(0.01, 3, 40) 3 for n in range(0, 40, 5): ----> 4 plt.plot(zs, [D**0.5*get_perf_uniform(z, D, 20, n) for z in zs], label="n=" + str(n)) 5 plt.grid() 6 plt.legend() <ipython-input-93-e3e58b125a58> in get_perf_uniform(z, D, dim, n) 9 out += P(a, b, z, s, D, 0) 10 for r in rs: ---> 11 out += P(a, b, z, s, D, r) 12 return out/dim/dim/(len(rs) + 1) - 0.5 <ipython-input-87-8b1069012640> in P(a, b, z, s, D, r) 1 # r is triangle distribution 2 def P(a, b, z, s, D, r=0): ----> 3 rtD1 = (D-1)**0.5 4 bma = Phi((b+r)/s/rtD1 - a/z/s) 5 amb = Phi((a+r)/s/rtD1 - b/z/s) KeyboardInterrupt:
# def sample_perf_normal(z, D, dim):
# # try gaussian
# s = 1
# out = 0
# for a in np.random.normal(size=dim):
# for b in np.random.normal(size=dim):
# out += P(a, b, z, s, D)
# return out/dim/dim - 0.5
# # this is too slow and too high-variance!!
# sample_perf_normal(1, 10, 300)
def approx_perf_normal(z, D, dim):
# try discrete gaussian
s = 1
out = 0
bd = 5
for a in np.linspace(-bd, bd, dim+1):
for b in np.linspace(-bd, bd, dim+1):
wt = (Phi(b + bd/dim) - Phi(b - bd/dim)) * (Phi(a + bd/dim) - Phi(a - bd/dim))
out += wt * P(a, b, z, s, D)
return out - 0.5
# normal .. still not too good. worse than uniform
inps = range(10, 410, 30)
plt.plot(inps, [D**0.5*approx_perf_normal(1, D, 200) for D in inps])
plt.grid()
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-99-38652ac3cc01> in <module> 1 # normal .. still not too good. worse than uniform 2 inps = range(10, 410, 30) ----> 3 plt.plot(inps, [D**0.5*approx_perf_normal(1, D, 200) for D in inps]) 4 plt.grid() <ipython-input-99-38652ac3cc01> in <listcomp>(.0) 1 # normal .. still not too good. worse than uniform 2 inps = range(10, 410, 30) ----> 3 plt.plot(inps, [D**0.5*approx_perf_normal(1, D, 200) for D in inps]) 4 plt.grid() <ipython-input-98-0886ec418b98> in approx_perf_normal(z, D, dim) 6 for a in np.linspace(-bd, bd, dim+1): 7 for b in np.linspace(-bd, bd, dim+1): ----> 8 wt = (Phi(b + bd/dim) - Phi(b - bd/dim)) * (Phi(a + bd/dim) - Phi(a - bd/dim)) 9 out += wt * P(a, b, z, s, D) 10 return out - 0.5 <ipython-input-49-a311a6192090> in Phi(z) 1 def Phi(z): ----> 2 return 0.5*(1 + erf(z*2**-0.5)) KeyboardInterrupt:
zs = np.linspace(0.01, 3, 40)
for D in [100, 200, 400, 1000, 100000]:
plt.plot(zs, [D**0.5*approx_perf_normal(z, D, 100) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
1/pi
zs = np.linspace(0.99, 1.01,100)
out = [D**0.5*approx_perf_normal(z, D, 200) for z in zs]
print(zs[np.argmax(out)])
print(max(out))
def approx_perf_hrss(z, D):
# try threshold alg (only +1 and -1)
s = 1
out = 0
for a in [-1, 1]:
for b in [-1, 1]:
out += P(a, b, z, s, D)
return out/4 - 0.5
# hrss works! and performs about as well... as a simple "flip". Changing z is like changing threshold..
inps = range(10, 410)
plt.plot(inps, [D**0.5*approx_perf_hrss(1, D) for D in inps], label="z=" + str(z))
plt.grid()
zs = np.linspace(0.01, 3, 40)
for D in [100, 200, 400]:
plt.plot(zs, [D**0.5*approx_perf_hrss(z, D) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
<matplotlib.legend.Legend at 0x7f6357e17150>
def approx_perf_triang(z, D, dim):
s = 6**-0.5
out = 0
# it's not balanced for some reason
sumwts = 4*sum(1-abs(np.linspace(-1, 1, dim)))**2
for a in np.linspace(-1, 1, dim):
for b in np.linspace(-1, 1, dim):
wt = 4*(1-abs(a))*(1-abs(b))
out += wt*P(a, b, z, s, D)
return out/sumwts - 0.5
inps = range(10, 410, 10)
plt.plot(inps, [D**0.5*approx_perf_triang(1, D, 100) for D in inps], label="z=" + str(z))
plt.grid()
zs = np.linspace(0.01, 3, 40)
for D in [100, 200, 400]:
plt.plot(zs, [D**0.5*approx_perf_triang(z, D, 100) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
<matplotlib.legend.Legend at 0x7f6357da3210>
def approx_perf_reverse_triang(z, D, dim):
s = 2**-0.5
out = 0
# it's not balanced for some reason
sumwts = 4*sum(abs(np.linspace(-1, 1, dim)))**2
for a in np.linspace(-1, 1, dim):
for b in np.linspace(-1, 1, dim):
wt = 4*abs(a)*abs(b)
out += wt* P(a, b, z, s, D)
return out/sumwts - 0.5
inps = range(10, 40000, 500)
plt.plot(inps, [D**0.5*approx_perf_reverse_triang(1, D, 100) for D in inps])
plt.grid()
zs = np.linspace(0.01, 3, 40)
for D in [100, 200, 1000,100000]:
plt.plot(zs, [D**0.5*approx_perf_reverse_triang(z, D, 100) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
<matplotlib.legend.Legend at 0x7f6358a71b50>
import itertools
from collections import Counter
def approx_perf_discrete(z, D, opts, n=0):
# generalized threshold alg (only certain opts available)
s = np.var(opts)**0.5
out = 0
for a in opts:
for b in opts:
if n == 0:
out += P(a, b, z, s, D)
else:
cnt = get_all_rs(opts, n)
for ele in cnt:
out += cnt[ele]/len(opts)**n * P(a, b, z, s, D, ele)
return out/(len(opts)**2) - 0.5
def get_all_rs(opts, n):
cnt = Counter()
for k in itertools.product(opts, repeat=n):
cnt[sum(k)] += 1
return cnt
#hrss, modified
inps = range(100, 2000, 10)
plt.plot(inps, [D**0.5*approx_perf_discrete(1, D, [-1, 1]) for D in inps])
plt.grid()
# comparison to large-degree formula
z = 0.9
fx = lambda D: D**0.5 * 0.25 * (erf(2**-0.5 * (1/z + D**-0.5))**2 - erf(2**-0.5 * (1/z - D**-0.5))**2)
# from taylor series expansion (note: this matches the expression from asymptotic analysis, z -> 1/2k)
taylor = 2*pi**-0.5 * exp(-0.5 /z/z) * erf(2**-0.5 /z) * 2**-0.5
inps = np.linspace(100, 20000)
plt.plot(inps, fx(inps), label="HRSS large-degree formula")
plt.plot(inps, [D**0.5*approx_perf_discrete(z, D, [-1, 1]) for D in inps], label="approx_perf_discrete")
plt.plot(inps, [taylor for _ in inps], label='first taylor approx')
plt.grid()
plt.legend()
<matplotlib.legend.Legend at 0x7f63586a79d0>
#hrss, modified for triangles
inps = range(2, 30, 1)
plt.plot(inps, [D**0.5*approx_perf_discrete(1.1, D, [-1, 1], 3) for D in inps])
plt.grid()
# try triangles
ns = np.random.triangular(0, 1, 1, 100)
inps = range(100, 2000, 20)
plt.plot(inps, [D**0.5*approx_perf_discrete(1.1, D, list(ns) + list(-ns)) for D in inps])
plt.grid()
inps = range(100, 1000, 10)
plt.plot(inps, [D**0.5*approx_perf_discrete(1.1, D, [-1, 0, 1]) for D in inps])
plt.grid()
# with triangles
inps = range(3, 50)
plt.plot(inps, [D**0.5*approx_perf_discrete(1.1, D, [-1, 0, 1], 5) for D in inps])
plt.grid()
# squared uniform distribution
r = (2*np.random.sample(10) - 1)**2
ns = list(r) + list(-r)
zs = np.linspace(0.01, 3, 80)
for D in [200, 1000,4000, 100000]:
plt.plot(zs, [D**0.5*approx_perf_discrete(z, D, ns) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
# random inputs
zs = np.linspace(0.01, 3, 80)
for D in [200, 1000,4000, 100000]:
plt.plot(zs, [D**0.5*approx_perf_discrete(z, D, [0.8, 0.7, 0.6, -0.6, -0.7, -0.8]) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
inps = range(100, 2000, 10)
ns = np.random.normal(size=10)
plt.plot(inps, [D**0.5*approx_perf_discrete(1, D, np.concatenate((ns, -ns))) for D in inps])
plt.grid()
# normal performs much better than I expected
non symmetric distributions?
ns = np.random.triangular(-1, , 1, 30)
ns = list(ns) + [-sum(ns)]
inps = range(100, 2000, 20)
plt.plot(inps, [D**0.5*approx_perf_discrete(1, D, ns) for D in inps])
plt.grid()
plt.figure()
# random inputs
zs = np.linspace(0.01,3, 80)
for D in [200, 1000,4000, 100000]:
plt.plot(zs, [D**0.5*approx_perf_discrete(z, D, ns) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
ns = [-1, -0.5, 0.5, 1]
inps = range(100, 2000, 20)
plt.plot(inps, [D**0.5*approx_perf_discrete(1, D, ns) for D in inps])
plt.grid()
plt.figure()
# random inputs
zs = np.linspace(0.01,3, 80)
for D in [200, 1000,4000, 100000]:
plt.plot(zs, [D**0.5*approx_perf_discrete(z, D, ns) for z in zs], label="D=" + str(D))
plt.grid()
plt.legend()
def simple(f, g, p, D):
"""z=1, using alpha * (D-1) triangles"""
# goal is to minimize this fn
erff = erf(2**-0.5 * (f-p))
erfg = erf(2**-0.5 * (g-p))
return erff * erfg - (2/pi)**0.5 * (D-1)**-0.5 * (f*erff * exp(-0.5 * (g-p)**2) + g*erfg * exp(-0.5 * (f-p)**2))
inps = np.linspace(5, 100)
plt.plot(inps, [simple(1, 1, 0, d) for d in inps])
[<matplotlib.lines.Line2D at 0x7f635962f250>]
# erf(x)erf(y) sums to 0 for symmetric distributions
# erf(x+p) erf(y+p) ?
# erf(x+p) sum erf(p+s)erf(p-s)
inps = np.linspace(-5, 5, 1000)
for p in [-1, -0.5, -0.2, 0, 0.2, 0.5, 1]:
plt.plot(inps, erf(p+inps)+erf(p-inps), label="p=" + str(p))
plt.legend()
<matplotlib.legend.Legend at 0x7f63595e6a90>
ps = np.linspace(-1, 1, 100)
plt.plot(ps, [sum(erf(2**-0.5*(p+inps))+erf(2**-0.5*(p-inps)))/len(inps) for p in ps])
plt.grid()
def calc(p):
fx = lambda f,g: erf((f-p)*2**-0.5)*erf((g-p)*2**-0.5)
out = fx(1,1) + fx(1,-1) + fx(-1,1) + fx(-1,-1)
return out/p/p/4
# for hrss, it's approximately 0.234 p^2 ... i.e. 2/pi e p^2 = 2/pi e alpha as expected
ps = np.linspace(-1, 1, 100)
plt.plot(ps, calc(ps))
[<matplotlib.lines.Line2D at 0x7f63583876d0>]
the claim is that $P = 0.5 - 0.5 erfstuff$, which as D to infty, approaches $0.5 - 0.5*0.234*alpha$
0.5 * 2/pi/e
0.11709966304863834
plt.plot(ps, [sum(erf(2**-0.5*(p+inps)))/len(inps) for p in ps])
[<matplotlib.lines.Line2D at 0x7f63583505d0>]
2/pi/exp(1)
0.23419932609727667
fn(100, 100/2 + 100**0.5)
0.005965790127283934
0.11709966304863834
# guess: performance - 0.5 approaches 1/2 * 0.234 alpha = 0.234 / jump
# z = 1 = 1/2k, k ~ 0.5
jump = 2
ds = range(jump, 300*jump, 5*jump)
outs = []
for d in ds:
y=int((d-1)/jump)
fn = calc_performance_y_triangles_curried(y)
outs.append(fn(d, d/2 + 0.5*d**0.5))
plt.plot(ds, outs)
plt.plot(ds, [-1/pi/e/jump]*len(ds))
plt.grid()
if z = 1/2k, then 1/z -> 2k ... (1/z) squared -> 4k^2
zs = np.linspace(0.1, 2, 100)
plt.plot(zs, exp(1/zs/zs))
plt.yscale('log')
plt.grid()
# z = 1 = 1/2k, k ~ 0.5
z=0.4
jump = 2
ds = range(jump, 300*jump, 5*jump)
outs = []
for d in ds:
y=int((d-1)/jump)
fn = calc_performance_y_triangles_curried(y)
outs.append(fn(d, d/2 + 1/(2*z) *d**0.5))
plt.plot(ds, outs)
plt.plot(ds, [-1/pi/exp(1/z/z)/jump]*len(ds))
plt.grid()
# z = 1 = 1/2k, k ~ 0.5
z=1
jump = 2
ds = range(5*jump, 400*jump, 5*jump)
outs = []
for d in ds:
y=int((d-1)/jump)
fn = calc_performance_y_triangles_curried(y)
outs.append(d*fn(d, d/2 + 1/(2*z) *d**0.6666))
plt.plot(ds, outs)
# plt.plot(ds, [-1/pi/exp(1/z/z)/jump]*len(ds))
plt.grid()
/home/kunal/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: overflow encountered in double_scalars /home/kunal/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: invalid value encountered in double_scalars
# z = 1 = 1/2k, k ~ 0.5
jump = 2
ds = range(10*jump, 200*jump, 10*jump)
outs = []
args = []
for d in ds:
y=int((d-1)/jump)
fn = calc_performance_y_triangles_curried(y)
outs.append(max(test_all_thresholds(d, fn))*d)
args.append((np.argmax(test_all_thresholds(d, fn)) - d/2)*d**-(2/3))
plt.plot(ds, outs)
# plt.plot(ds, [-1/pi/e/jump]*len(ds))
plt.grid()
plt.title("performance over D")
plt.figure()
plt.plot(ds, args)
plt.grid()
plt.title('looking for k, $t - D/2$')
Text(0.5, 1.0, 'looking for k, $t - D/2$')
performance is 0.52/D + O(1/D^2)... by the time D > 1000, it's over maybe