#!/usr/bin/env python
# coding: utf-8

# We are looking at the problem Max $k$XOR, where each variable is involved in $D+1$ clauses and has no overlapping constraints.
# 
# This is code comparing one-local algorithms: Hirvonen's threshold algorithm and Farhi's QAOA at depth 1.

# In[1]:


from functools import lru_cache
from matplotlib.ticker import StrMethodFormatter
from mpl_toolkits.axes_grid.inset_locator import (inset_axes, InsetPosition, mark_inset)
from numpy import cos, sin, tan, arctan as atan
from scipy.optimize import minimize
from scipy.special import binom, erf
from sympy import exp, simplify, expand, expand_trig, trigsimp
import matplotlib.pyplot as plt
import numpy as np
import sympy
import time
plt.rc('axes', axisbelow=True)


# In[2]:


print("Starting time:", time.ctime())


# # QAOA for Max 3XOR

# In[3]:


def qaoa_perf_3xor(s, D):
    return s/ ( 1 + 3/(1 - s*s)**D)**0.5


# In[4]:


gamma = sympy.Symbol('gamma')
D_symbol = sympy.Symbol('D')
qaoa_perf_3xor(sympy.sin(gamma), D_symbol)


# In[5]:


Ds = [5, 10, 20, 50, 100, 500, 1000, 5000, 10000, 50000, 500000]
inps = np.linspace(0, 3, 100000)


# In[6]:


for D in Ds:
    outs = [qaoa_perf_3xor(np.sin(i*D**-0.5), D)*D**0.5 for i in inps]
    plt.plot(inps, outs, label="$D$=" + str(D))
plt.grid()
plt.legend()
plt.title("QAOA performance for $k=3$")
plt.xlabel("$\gamma \sqrt{D}$")
plt.ylabel("$C_{3,D}$: performance is $1/2 + C_{3,D}/\sqrt{D}$")
plt.savefig("images/QAOA_max3xor.png",  dpi=300)


# We can also look at the derivative of performance with respect to $\gamma$:

# In[7]:


qaoa_perf_derivative = sympy.lambdify((gamma, D_symbol), sympy.Derivative(qaoa_perf_3xor(sympy.sin(gamma), D_symbol), gamma, evaluate=True))


# In[8]:


for D in Ds:
    outs = [qaoa_perf_derivative(i*D**-0.5, D) for i in inps]
    plt.plot(inps, outs, label="$D$=" + str(D))
    pt = np.argmin(np.abs(outs))
    print(D, inps[pt], outs[pt])
plt.grid()
plt.plot(inps, [0]*inps, color='black')
plt.legend()
plt.title("first derivative")


# Let's check to make sure this works given $\gamma$ and $\beta$. Using the asymptotic values:

# In[9]:


def qaoa_perf_2var_3xor(beta, gamma, D):
    p, q = np.cos(2*beta), np.sin(2*beta)
    c, s = np.cos(gamma), np.sin(gamma)
    return 0.5*q*s*(3*p*p*c**D - q*q*c**(3*D))


# In[10]:


D = 100
gamma = 1.0535/D**0.5
beta = np.arcsin((1/(np.cos(gamma)**(2*D) + 3))**0.5)/2
print(beta, gamma)
print(qaoa_perf_2var_3xor(beta, gamma, D)*D**0.5)


# In[11]:


Ds = range(3, 500, 1)
plt.plot(Ds, [qaoa_perf_2var_3xor(0.29, 1.0535/D**0.5, D)*D**0.5 for D in Ds])
plt.grid()
plt.xlabel("degree $D$")
plt.ylabel("$C_{3,D}$: performance is $1/2 + C_{3,D}/\sqrt{D}$")
plt.title("QAOA for Max 3XOR")


# In[12]:


Ds = range(3000000, 1000000000, 100000)
plt.plot(Ds, [qaoa_perf_2var_3xor(0.29, 1.0535/D**0.5, D)*D**0.5 for D in Ds])
plt.grid()

plt.xlabel("degree $D$")
plt.ylabel("$C_{3,D}$: performance is $1/2 + C_{3,D}/\sqrt{D}$")
plt.title("QAOA for Max 3XOR")


# What's the asymptotic value of QAOA on Max 3XOR?

# In[13]:


def v(k):
    return k / (1 + 3*exp(k*k))**0.5


# In[14]:


k = sympy.Symbol('k')
der = sympy.lambdify(k, sympy.Derivative(v(k), k, evaluate=True))


# In[15]:


sympy.Derivative(v(k), k, evaluate=True)


# In[16]:


inps = np.linspace(0, 2, 10000000)
plt.plot(inps, der(inps))
plt.plot(inps, [0]*inps, color='black')
plt.grid()


# In[17]:


pt = np.argmin(np.abs(der(inps)))
inps[pt], der(inps)[pt]


# In[18]:


v(inps[pt])


# What are the optimal values of QAOA for every degree?

# In[19]:


fmaker = lambda D: lambda i: -qaoa_perf_3xor(np.sin(i*D**-0.5), D)*D**0.5


# In[20]:


Ds = range(2, 10000)


# In[21]:


optimized_vals = [-minimize(fmaker(D), 1).fun for D in Ds]


# In[22]:


comparison = [qaoa_perf_2var_3xor(0.29, 1.0535/D**0.5, D)*D**0.5 for D in Ds]
assert np.all(np.array([o-c for o,c in zip(optimized_vals, comparison)]) > 0)


# In[23]:


plt.plot(Ds, optimized_vals)
plt.grid()


# # The threshold algorithm for Max 3XOR

# In[24]:


@lru_cache(maxsize=int(1e7))
def g(D, t):
    return 2**(-D) * sum([binom(D,i) for i in range(0, t+1)])


# In[25]:


@lru_cache(maxsize=int(1e7))
def Delta(D, t):
    return 2**(-D)* binom(D, t)


# In[26]:


@lru_cache(maxsize=int(1e7))
def general_improvement(D, t, k):
    val_delta = Delta(D, t)
    val_g = g(D, t)
    return 0.25 * ( (1-2*val_g + 2*val_delta)**k - (1 - 2*val_g)**k )


# What's the performance at $k=3$?

# In[27]:


for D in [5, 10, 20, 30, 40, 80, 100, 300, 1000]:
    inps = np.array(list(range(-1, D+2)))
    plt.plot((inps/D - 1/2)*D**0.5, [general_improvement(D, t, 3)*D**0.5 for t in inps], label="$D$=" + str(D))

plt.legend()
plt.ylabel("$C_{3,D}$: performance is $1/2 + C_{3,D}/\sqrt{D}$")
plt.xlabel("a: threshold is $D/2 + a/\sqrt{D}$")
plt.xlim(-2, 2)
plt.title("Performance of threshold algorithm for Max 3XOR")
plt.grid()


# Finding the optimal threshold:

# In[28]:


def get_argmax(D, k, negative_only=False):
    if negative_only:
        inps = range(-1, int(D/2))
    else:
        inps = range(-1, D+2)
    return inps[np.argmax([general_improvement(D, t, k) for t in inps])]


# In[29]:


inps = range(1, 500)
plt.scatter(inps, [(get_argmax(D, 3) - D/2)/D**0.5 for D in inps], s=3)
plt.xlabel("degree $D$")
plt.ylabel("a: threshold is $D/2 + a/\sqrt{D}$")
plt.title("Optimal threshold for Max 3XOR")
plt.grid()


# If we only look at thresholds below $D/2$, we get a clearer pattern:

# In[30]:


inps = range(1, 500)
plt.scatter(inps, [(get_argmax(D, 3, True) - D/2)/D**0.5 for D in inps], s=3)
plt.xlabel("degree $D$")
plt.ylabel("a: threshold is $D/2 + a/\sqrt{D}$")
plt.title("Optimal (negative) threshold for Max 3XOR")
plt.grid()


# # Comparison for Max 3XOR

# In[31]:


def get_max(D, k):
    inps = range(-1, D+2)
    return max([general_improvement(D, t, k) for t in inps])


# In[32]:


inps = range(1, 500)
plt.plot(inps, [get_max(D, 3)*D**0.5 for D in inps], label='threshold')
plt.plot(inps, optimized_vals[:len(inps)], label='qaoa')
plt.legend()
plt.grid()
plt.title("Performance on Max 3XOR")
plt.xlabel("degree $D$")
plt.ylabel("$C_{3,D}$: performance is $1/2 + C_{3,D}/\sqrt{D}$")


# Zooming in at small degree:

# In[33]:


inps = range(1, 100)
plt.plot(inps, [get_max(D, 3)*D**0.5 for D in inps], label='threshold')
plt.plot(inps, optimized_vals[:len(inps)], label='qaoa')
plt.legend()
plt.grid()
plt.title("Performance on Max 3XOR")
plt.xlabel("degree $D$")
plt.ylabel("$C_{3,D}$: performance is $1/2 + C_{3,D}/\sqrt{D}$")


# When does QAOA win?

# In[34]:


np.array(inps)[[get_max(D, 3)*D**0.5 - o < 0 for D, o in zip(inps, optimized_vals[:len(inps)])]]


# # Threshold algorithm for Max $k$XOR

# The performance increases with $k$.

# In[35]:


inps = range(1, 200)
for k in range(2, 4):
    plt.plot(inps, [get_max(D, k)*D**0.5 for D in inps],label="threshold $k$=" + str(k))
plt.grid()
plt.legend()
plt.title("Performance on Max $k$XOR")
plt.xlabel("degree $D$")
plt.ylabel("$C_{k,D}$: performance is $1/2 + C_{k,D}/\sqrt{D}$")


# In[36]:


inps = range(20, 300)
for k in range(2, 10):
    plt.plot(inps, [get_max(D, k)*D**0.5 for D in inps],label="threshold $k$=" + str(k))
plt.grid()
plt.legend()
plt.title("Performance on Max $k$XOR")
plt.xlabel("degree $D$")
plt.ylabel("$C_{k,D}$: performance is $1/2 + C_{k,D}/\sqrt{D}$")


# In[37]:


inps = range(50, 200)
for k in range(100, 1000, 100):
    plt.plot(inps, [get_max(D, k)*D**0.5 for D in inps],label="threshold $k$=" + str(k))
plt.grid()
plt.legend()
plt.title("Performance on Max $k$XOR")
plt.xlabel("degree $D$")
plt.ylabel("$C_{k,D}$: performance is $1/2 + C_{k,D}/\sqrt{D}$")


# What about the optimal threshold?

# In[38]:


inps = range(50, 500)
for k in range(2,10):
    plt.plot(inps, [abs(get_argmax(D, k) - D/2)/D**0.5 for D in inps], label='$k$=' + str(k))
plt.grid()
plt.legend()
plt.title("Optimal threshold for Max $k$XOR")
plt.xlabel("degree $D$")
plt.ylabel("a: threshold is $D/2 + a/\sqrt{D}$")


# ### Large degree approximation

# In[39]:


def f(alpha):
    return -erf(2**0.5 * alpha)

def grtD(alpha):
    return (2/np.pi)**0.5 * np.e**(-2*alpha*alpha)

def large_degree_improvement(alpha, k):
    return f(alpha)**k * (k/2) * (-2*alpha)/(k-1)

inps = np.linspace(-1, -0.1, 100000)
plt.plot(inps, [grtD(i)/f(i) for i in inps])
for k in range(2, 7):
    plt.plot(inps, -2*inps/(k-1), label='$k$=' + str(k))
    alpha = inps[[(grtD(i)/f(i)+2*i/(k-1) > 0) for i in inps]][0]
    print('alpha (k=' + str(k) + '):', alpha)
    print('improvement*rtD:', large_degree_improvement(alpha, k))
plt.legend()
plt.grid()


# We can use SciPy to find the optimal threshold and value.

# In[40]:


fmaker = lambda k: lambda i: abs(grtD(i)/f(i)+2*i/(k-1))


# In[41]:


for k in [2,3,4]:
    plt.plot(inps, fmaker(k)(inps), label='$k$=' + str(k))
plt.xlabel("alpha")
plt.ylabel("difference (optimal at zero)")
plt.grid()
plt.legend()


# In[42]:


ks = range(2, 200)
best_alphas = [minimize(fmaker(k), -1, method='Nelder-Mead', options={"ftol":1e-10}).x[0] for k in ks]
best_threshold_perf = [large_degree_improvement(a, k) for a,k in zip(best_alphas, ks)]


# In[43]:


for i in range(18):
    print("k:", ks[i], "alpha:", best_alphas[i], "perf:", best_threshold_perf[i])


# In[44]:


plt.scatter(ks, best_threshold_perf)
plt.grid()
plt.xlabel("$k$")
plt.ylabel("$C_{k}$: performance is $1/2 + C_{k}/\sqrt{D}$")
plt.title("Best asymptotic performance of threshold algorithm for Max $k$XOR")


# Comparing the expression with its large-degree asymptotic form:

# In[45]:


@lru_cache(maxsize=int(1e7))
def long_range_improvement(D, t, k):
    alpha = (t - D/2)*D**-0.5
    val_delta = (2/np.pi/D)**0.5 * np.e**(-2*alpha*alpha)
    val_g = 0.5 + 0.5*erf(2**0.5 * alpha) + 0.5 * val_delta
    return 0.25 * ( (1-2*val_g + 2*val_delta)**k - (1 - 2*val_g)**k )


# In[46]:


D = 1000
k = 8
inps = np.array(list(range(-1, D+2)))
plt.plot((inps/D - 1/2)*D**0.5, [long_range_improvement(D, t, k)*D**0.5 for t in inps], label='asymptotic')
plt.plot((inps/D - 1/2)*D**0.5, [general_improvement(D, t, k)*D**0.5 for t in inps], label='generic')
plt.xlim(-2, 2)
plt.grid()
plt.xlabel("$a$: threshold is $D/2 + a/\sqrt{D}$")
plt.ylabel("$C_{k}$: performance is $1/2 + C_{k}/\sqrt{D}$")
plt.legend()
plt.title("asymptotic (large-degree) vs generic performance of threshold algorithm; $D$="  +str(D))


# # QAOA for Max $k$XOR

# We have a relationship from $\gamma$ to $\beta$. So let's plot performance as a single-variable function:

# In[47]:


# @lru_cache(maxsize=int(1e7))
def gamma_from_beta(b, k, D):
    return atan( ((tan(2*b)**(-2) + 1)/k/D)**0.5 )


# In[48]:


def f(b, k, D):
    p, q = cos(2*b), sin(2*b)
    g = gamma_from_beta(b, k, D)
    c, s = cos(g), sin(g)
    return -0.25*s*1j*( (p + 1j*q*c**D)**k - (p - 1j*q*c**D)**k )


# In[49]:


for k in [3, 4, 5, 7, 10, 100]:
    plt.figure()
    Ds = [2, 5, 10, 20, 100, 500, 1000, 10000]
    for D in Ds:
        inps = np.linspace(1e-15, np.pi/4, 10000)
        outs = [complex(f(i, k, D)).real*D**0.5 for i in inps]
        plt.plot(inps/np.pi, outs, label="$D$=" + str(D))
        print('k:', k, "D:", D, 'beta/pi:', inps[np.argmax(outs)]/np.pi, 'gamma:', gamma_from_beta(inps[np.argmax(outs)], k, D), 'perf:', np.max(outs))
    plt.title("QAOA for Max $k$XOR, $k$=" + str(k))
    plt.grid()
    plt.ylabel("$C_{" + str(k) + ",D}$: performance is $1/2 + C_{" + str(k) + ",D}/\sqrt{D}$")
    plt.xlabel("$ beta / \pi$")
    plt.legend()


# Find the maximum performance of QAOA for any k and D:

# In[50]:


fmaker = lambda k, D: lambda i: -complex(f(i, k, D)).real*D**0.5


# In[51]:


def get_qaoa_max(k,D):
    # do pass to find approximate optimum
    inps = np.linspace(1e-15, np.pi/4, 100)
    outs = [complex(f(i, k, D)).real*D**0.5 for i in inps]
    return -minimize(fmaker(k, D), inps[np.argmax(outs)]).fun


# In[52]:


get_ipython().run_cell_magic('time', '', 'for k in range(2, 10):\n    Ds = range(2, 300)\n    qaoa_kxor = [get_qaoa_max(k,D) for D in Ds]\n    plt.plot(Ds, qaoa_kxor, label="$k$=" + str(k))\nplt.grid()\nplt.xlabel(\'degree $D$\')\nplt.legend()\nplt.ylabel("$C_{k,D}$: performance is $1/2 + C_{k,D}/\\sqrt{D}$")\nplt.title("Best QAOA performance for Max $k$XOR")\nplt.savefig(\'images/qaoa_best_maxkxor.png\',  dpi=300)\n')


# ### Limiting behavior for QAOA on Max $k$XOR

# In[53]:


def qaoa_asymptotic_kxor(k,t):
    if t*t*k < 1:
        return 0
    term_1 = (t*t*k - 1)**0.5 + 1j*np.e**(-t*t/2)
    term_2 = (t*t*k - 1)**0.5 - 1j*np.e**(-t*t/2)
    return -0.25*1j * t**(1-k) * k**(-k/2) * (term_1**k - term_2**k)


# In[54]:


qaoa_asymptotic_maxes = []
for k in range(2, 200):
    inps = np.linspace(0, 2, 100000)
    outs = [qaoa_asymptotic_kxor(k,i).real for i in inps]
    plt.plot(inps, outs, label="$k$=" + str(k))
    if k < 20 or k % 5 == 0:
        t = inps[np.nanargmax(outs)]
        print("k=", str(k), "Max:", np.nanmax(outs), "at t=", t, 'beta=', np.arcsin(1/t/k**0.5)/2)
    qaoa_asymptotic_maxes.append(np.nanmax(outs))
plt.grid()
plt.xlabel("$t$: gamma = $t/\sqrt{D}$")
plt.ylabel("$C_{k}$: performance is $1/2 + C_{k}/\sqrt{D}$")
# plt.legend()


# # Comparison for Max $k$XOR

# In[55]:


get_ipython().run_cell_magic('time', '', 'fig, axs = plt.subplots(3, 2, sharey=True, figsize=(10, 14))\naxs = axs.reshape(-1,)\nks = [3, 4, 5, 7, 10, 20]\nfor i in range(len(ks)):\n    ax = axs[i]\n    k = ks[i]\n    Ds = range(2,300)\n    ax.plot(Ds, [get_max(D, k)*D**0.5 for D in Ds], label=\'threshold\')\n    ax.plot(Ds, [get_qaoa_max(k,D) for D in Ds], label=\'qaoa\')\n    ax.grid()\n    ax.set_xlabel(\'degree $D$\')\n    ax.legend(loc=\'lower right\')\n    ax.set_ylabel("$C_{" + str(k) + ",D}$: performance is $1/2 + C_{" + str(k) + ",D}/\\sqrt{D}$")\n    ax.set_title("Best performance for Max $" + str(k) + "$XOR")\nfig.tight_layout()\nplt.savefig(\'images/comparison.png\',  dpi=300)\n')


# So for $k > 4$, the QAOA does better than the threshold algorithm.

# This is true even in the large-degree limit.

# In[56]:


max([q for q in qaoa_asymptotic_maxes if q != np.inf])


# In[ ]:


# In[57]:


fig, ax1 = plt.subplots()
ks = range(2, 200)
ax1.scatter(ks[:len(best_threshold_perf)], best_threshold_perf, label='threshold', s=3)
ax1.scatter(ks[:len(qaoa_asymptotic_maxes)], qaoa_asymptotic_maxes, label='qaoa', s=3)
ax1.grid()
ax1.set_xlabel("$k$")
ax1.set_ylabel("$C_{k}$: performance is $1/2 + C_{k}/\sqrt{D}$")
ax1.legend(loc=0)
ax1.set_title("Asymptotic performance of algorithms for Max $k$XOR")

# Following https://scipython.com/blog/inset-plots-in-matplotlib/
ax2 = plt.axes([0,0,1,1])
ip = InsetPosition(ax1, [0.69,0.095,0.25,0.3])
ax2.set_axes_locator(ip)
max_k_vals_inset = 8
ax2.grid()
ax2.scatter(ks[:max_k_vals_inset], best_threshold_perf[:max_k_vals_inset], s=30, label='threshold')
ax2.scatter(ks[:max_k_vals_inset], qaoa_asymptotic_maxes[:max_k_vals_inset],  s=30, label='qaoa')
ax2.set_xticks(np.arange(2, max_k_vals_inset+2, 2))
ax2.set_xticklabels(ax2.get_xticks(), backgroundcolor='w', fontsize=9)
ax2.set_yticklabels(ax2.get_yticks(), backgroundcolor='w', fontsize=9)
ax2.yaxis.set_major_formatter(StrMethodFormatter('{x:,.2f}')) # 2 decimal places

plt.savefig('images/asymptotic_comparison.png',  dpi=300)
plt.show()


# In[58]:


def logfit(x, y, deg=1):
    logx = np.log(x)
    logy = np.log(y)
    return np.polyfit(logx,logy,deg=deg)

# poly = np.poly1d(coeffs)
# yfit = lambda x: np.exp(poly(np.log(x)))
# plt.loglog(inps, yfit(inps))
# plt.loglog(inps, dataset)


# In[59]:


dataset = qaoa_asymptotic_maxes
inps = ks[:len(dataset)]

qaoa_coeffs = logfit(inps, dataset, deg=1)
print(qaoa_coeffs)

dataset = best_threshold_perf
inps = ks[:len(dataset)]

threshold_coeffs = logfit(inps, dataset, deg=1)
print(threshold_coeffs)


# In[60]:


plt.loglog(ks, best_threshold_perf, label='threshold')
plt.loglog(ks[:len(qaoa_asymptotic_maxes)], qaoa_asymptotic_maxes, label='qaoa')
plt.loglog(ks, [(np.e**threshold_coeffs[1])*k**threshold_coeffs[0] for k in ks], label='threshold fit')
plt.loglog(ks, [(np.e**qaoa_coeffs[1])*k**qaoa_coeffs[0] for k in ks], label='qaoa fit')
plt.legend()
plt.grid()


# This suggests that QAOA's performance scales as $k^{0.2}$ and the threshold algorithm's performance scales as $k^{0.1}$.

# # Comparison with the Parisi constant (upper bound at large degree)

# In[61]:


# %%time
# %run parisi.ipynb
ps = range(2, 35)
outs_scaled = np.array([1.0799048 , 1.15044002, 1.16740424, 1.17328777, 1.17560844,
       1.17659504, 1.1770332 , 1.17723326, 1.17732629, 1.17737008,
       1.17739087, 1.17740081, 1.17740557, 1.17740787, 1.17740898,
       1.17740952, 1.17740978, 1.1774099 , 1.17740996, 1.17740999,
       1.17741001, 1.17741002, 1.17741002, 1.17741002, 1.17741002,
       1.17741002, 1.17741002, 1.17741002, 1.17741002, 1.17741002,
       1.17741002, 1.17741002, 1.17741002])


# In[62]:


parisi_clean_data = np.array([[p, x * p**0.5 / 2] for p,x in zip(ps, outs_scaled)])


# In[63]:


parisi_estimate = lambda x: (x*np.log(2)/2)**0.5


# In[64]:


size = len(parisi_clean_data)
ks = np.array(list(range(2, 200)))
assert np.allclose(ks[:size], parisi_clean_data[:, 0])

plt.plot(ks[:len(best_threshold_perf)], best_threshold_perf, label='threshold', linewidth=3)
plt.plot(ks[:len(qaoa_asymptotic_maxes)], qaoa_asymptotic_maxes, label='qaoa', linewidth=3)
plt.plot(parisi_clean_data[:, 0], parisi_clean_data[:,1], label='optimal (parisi value)', color='green', linewidth=3)
plt.plot(ks, parisi_estimate(ks), label='parisi limit at high $k$', color='gray', alpha=0.3, linewidth=6)

plt.grid()
plt.xlabel("$k$")
plt.ylabel("$C_{k}$: performance is $1/2 + C_{k}/\sqrt{D}$")
plt.legend()
plt.title("Upper and lower bounds for Max $k$XOR")
plt.savefig('images/parisi_comparison.png',  dpi=300)


# In[65]:


print("Ending time:", time.ctime())