#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
from scipy.optimize import minimize
from scipy.integrate import quad
from scipy.special import logsumexp
import multiprocessing
import time


# In[2]:


print("Starting time:", time.ctime())


# # Evaluating the Parisi formula for a pure p-spin glass

# The goal here is to calculate the zero-temperature Parisi constant, and compare it to the maximum satisfying fraction of randomXORSAT instances.

# ## Using the Parisi formula

# Wei-Kuo Chen says:
# > I am not aware of numerical simulations for the Parisi constant P(K) for large K. Nevertheless, this constant can be written as the Parisi formula, which is a convex optimization problem (see Theorem 1 in https://arxiv.org/pdf/1606.05335.pdf). With this, it should be fairly easy to run a numerical simulation to approximate P(K) for large K.
# 

# Paraphrased from the linked paper:
# > We introduce the space $U$ that collects all nonnegative and nondecreasing functions $f$ on $[0,1)$ that are right continuous and satisfy $\int_0^1 f(t)dt < \infty$. Let's say $f$ has $k$ jumps. Then it has value $m_i$ in region $[q_i,q_{i+1})$, given $q_0 = 0$ and $q_{k+1} = 1$, where $m_i$ and $q_i$ are increasing and nonnegative. The distance metric $d$ is the integral of $f$, or equivalently, $d = \sum_{i=0}^k m_i(q_{i+1} - q_i)$.

# $P(f) = \Psi_f(0, h) - 0.5 \int_0^1 t \xi^{''}(t)f(t) dt$

# $\xi(s) =\sum_{p\ge 2} c_p^2 s^p$ Where the $c_p$'s are the mixing constants (mixed vs pure spin glasses).

# $\partial_t \Psi_f(t,x) = -0.5\xi^{''}(t) 
# \Big(
# \partial_x^2 \Psi_f(t,x) + f(t) (\partial_x \Psi_f(t,x))^2
# \Big)$

# With boundary condition: $\Psi_f(1,x) = |x|$.

# This can be solved recursively, by using the transformation $f(t) \Psi = \log \Phi$ for a piecewise, continuous function $f$.

# This is the coefficient $a_{\ell} = \sqrt{ \xi^{'}(q_{\ell+1}) - \xi^{'}(q_{l}) }$

# In[3]:


global grid


# In[4]:


def a(qs, l, xiprime):
    return (xiprime(qs[l+1]) - xiprime(qs[l]))**0.5


# Let $\Psi_{k+1}(x) = abs(x)$.
# 
# For $1\le \ell \le k$, let $Exp(m_{\ell} \Psi_{\ell}(x)) = \mathbb{E}[Exp(m_{\ell} \Psi_{\ell+1}(x + a_{\ell} z) ]$, for standard Gaussian variable $z$.

# Let $\Psi_0(x) = \mathbb{E}[\Psi_{\ell+1}(x + a_{0} z)]$, since I let $m_0 = 0$.

# In[5]:


def psi0(qs, ms, xiprime, k, PDF_INPS):
    a_s = np.array([a(qs, l, xiprime) for l in range(len(qs)-1)])
    start = 0
    for i in range(len(grid)):
        start += a_s[i]*grid[i]
    start =  np.abs(start)
    # 1 to k
    for i in list(range(1, k+1))[::-1]:
        start = np.log(np.sum(np.exp(ms[i]*start)*PDF_INPS, axis=i))/ms[i]
        # scipy is slower
        #  start = logsumexp(ms[i]*start, b=PDF_INPS, axis=i)/ms[i]
    # 0
    start = np.sum(PDF_INPS*start, axis=0)
    return start


# The penalty term in the operator is $0.5 \int_0^1 f(t)  t \xi^{''}(t) dt = 0.5 \sum_{i=0}^k \int_{q_i}^{q_{i+1}} m_i t \xi^{''}(t) dt$

# In[6]:


def penalty(qs, ms, xiprimeprime, k):
    out = 0
    for i in range(k+1):
        integral = quad(lambda t: t * xiprimeprime(t), qs[i], qs[i+1])[0]
        out += ms[i] * integral
    return 0.5 * out


# This tests the ground state energy of inputs **m**, **q**:

# In[7]:


def make_test(xiprime, xiprimeprime, k, PDF_INPS):
    # the input here is a list of "adjustments" 
    # (m_1, m_2-m_1, ...,m_k-m_{k-1}, q_1, q_2-q_1,...,q_k-q_{k-1})
    def test(inp):
        assert len(inp) == 2*k
        inp_qs,inp_ms= inp[:k],inp[k:]

        # if bad input, return a large number
        if np.any(np.array(inp) < 0) or sum(inp_ms) > 2 or sum(inp_qs) > 1:
            return 10000

        qs = np.array([0,*[sum(inp_qs[:i+1]) for i in range(k)],1])
        ms = np.array([0,*[sum(inp_ms[:i+1]) for i in range(k)]])
        output =  psi0(qs, ms, xiprime, k, PDF_INPS) - penalty(qs, ms, xiprimeprime, k)
        return output
    return test


# ## Evaluating the Parisi formula

# In[8]:


# set parameters
# k is number of jumps
k=2
# if this range is too small, it fails at higher p
INPS = np.linspace(-20,20,400)
PDF_INPS = stats.norm.pdf(INPS)
PDF_INPS = PDF_INPS/np.sum(PDF_INPS)
assert np.allclose(sum(PDF_INPS),1), sum(PDF_INPS)
grid = np.meshgrid(*[INPS]*(k+1), indexing='ij')


# In[9]:


# pure p-spin model; p=2 is SK model
ps = range(2, 35)
# if C_psq is too low, my convergence is not very good.
C_psq = 2

def run(P):
#     xi = lambda x: x**P * C_psq
    xiprime = lambda x: P * (x**(P-1)) * C_psq
    xiprimeprime = lambda x: P * (P-1) * (x**(P-2)) * C_psq
    opt = minimize(make_test(xiprime, xiprimeprime, k, PDF_INPS), 
                       [np.random.random()/k for _ in range(2*k)], 
                       method='Powell', 
                       options={"xtol": 1e-10, "ftol":1e-14}
                      )
    print("p:", P, opt.fun)
    return {"x": opt.x, "fun": opt.fun}


# In[10]:


get_ipython().run_cell_magic('time', '', 'print("kXOR:")\nr = []\nfor P in ps:\n    r.append(run(P))\n')


# In[11]:


num_vert_plots = int(np.ceil(len(ps)/10))
fig, axs = plt.subplots(num_vert_plots, 2, figsize=(12, 4*num_vert_plots))

for idx in range(len(r)):
    P = ps[idx]
    if idx % 5 == 0:
        ax = axs[idx // 10, (idx % 10)//5]
        ax.grid()

    qs = np.array([0,*[sum(r[idx]['x'][:k][:i+1]) for i in range(k)]])
    ms = np.array([0,*[sum(r[idx]['x'][k:][:i+1]) for i in range(k)]])
    ax.step(qs, ms, where='post', label="p=" + str(P))
    if idx % 5 == 4:
        ax.legend()

ax.legend()


# I notice that the locations of the symmetry breaking points are often at very low values.  This may be related to the Auffinger Chen Zeng result that perturbing a solution to this variational near $1$ will reduce the energy. So perhaps perturbations very close to $0$ can also reduce the energy: https://doi.org/10.1002/cpa.21886

# ## Finding the large-p limit

# Let's look at the constants after dividing out by $c_p$:

# In[12]:


outs = np.array([i['fun'] for i in r])
outs_scaled = np.array(outs)* C_psq**-0.5


# In[13]:


0.763168*2**0.5


# In[14]:


for p, x in zip(ps, outs_scaled):
    print(p, x)


# In[15]:


guess = (2*np.log(2))**0.5


# In[16]:


print(guess)


# In[17]:


plt.plot(ps, [guess]*len(ps), 'b--', 
         color='black', label="$\sqrt{2 log 2}$", alpha=0.7)
plt.plot(ps, outs_scaled, 'g.-', ms=7,label="pure k-spin model")
plt.xlabel("$k$")
plt.ylabel("ground state energy density $P(k)$")
plt.title("Parisi value for the pure $k$-spin model")
plt.grid()
plt.legend()
plt.savefig('images/parisi_value.png', dpi=300)


# ## Relationship with $c_{p}$

# I ran the above several times for different $c_p$. 
# The asymptotic value seems to depend on the constant I use. If $c_p^2 < 2 log(2)$, The energy is $log(2) + c_p^2 / 2$. Otherwise, the energy is $\sqrt{ 2 c_p^2 log(2)}$.

# In[18]:


# this is what I observed after running for many different c_p^2
inps = np.linspace(1e-10, 5, 100000)
f1 = lambda x: x**-0.5 * (np.log(2) + x/2)
f2 = lambda x: x**-0.5 * (2 * x * np.log(2))**0.5

plt.plot(inps, [f1(i) if i < 2*np.log(2) else f2(i) for i in inps], 
         label='i observed this scaling',
         linewidth=8, alpha=0.3, color='black')
plt.plot(inps, f1(inps), label='$log 2/C_p +C_p/2$', linewidth=2)
plt.plot(inps, f2(inps), label='$\sqrt{2 log 2}$', linewidth=2)
# plt.plot(inps, inps**0.5, label='sqrt(x) (expected scaling)', linewidth=2)
plt.legend()
plt.grid()
plt.ylim(0,5)
plt.xlabel('$C_p^2$')
plt.ylabel("$energy / C_p$")


# The minimum energy should be proportional to $c_p$. I think this means the limit of $P(p)$ is in fact $\sqrt{2 log 2}$.
# 
# This ended up being true, because of the relationship to the random energy model.

# ## Comparison with Montanari for Max 2XOR, Max 3XOR

# This also roughly matches the calculation in https://arxiv.org/pdf/2009.11481.pdf that does Max 2XOR and Max 3XOR. They get
# $$
# e_2 = 0.763168\pm 0.000002
# $$
# and 
# $$
# e_3 = 0.8132\pm 0.0001
# $$

# Where $e_2$ uses $\xi(s) = s^2/2$ and $e_3$ uses $\xi(s) = s^3/2$.

# In[19]:


for p, o in list(zip(ps, outs_scaled))[:2]:
    print("my result, p:", p, "e_p:", o * 2**-0.5)


# /The agreement gives me confidence that my approach is correct.

# Note: This matches Table 1 in the original Parisi paper! (k=0 -> 0.798, k=1 -> 0.7652, k=2 -> 0.7636)

# One way to improve my precision is to explicitly solve for the derivative, and use it in the optimization procedure (as the Montanari paper does).

# ## Using the Parisi constants in Sen's Max XOR upper bound

# I insert these values for Subhabrata Sen's bounds for Max XOR on hypergraphs, as listed here: https://doi.org/10.1002/rsa.20774
# 
# The satisfying fraction for Max CUT on p-uniform hypergraphs is to first order in D: $\frac{1}{2}+ \frac{P_p \sqrt{p}}{2}\frac{1}{\sqrt{D}}$

# This makes the satisfying fraction $\frac{1}{2} + \frac{C_p}{\sqrt{D}}$, where $C_p$ is listed below:

# In[20]:


for p, x in zip(ps, outs_scaled):
    print(p, x * p**0.5 / 2)


# Because of the limit, we expect a large $p$XORSAT problem to have satisfying fraction at most
# 
# $$\frac{1}{2} + \frac{\sqrt{2 log 2}}{2} \sqrt{\frac{p}{D}} = \frac{1}{2} + \sqrt{\frac{p log 2}{2}} \frac{1}{\sqrt{D}} \approx \frac{1}{2} + 0.58871\sqrt{\frac{p}{D}}$$

# ## Bonus: Evaluating the $p$-SAT approximations

# A similar formula to Sen has been done by Panchenko for $p$-SAT: https://arxiv.org/pdf/1608.06256.pdf

# Given $N$ variables and $\alpha N$ clauses, the satisfying fraction is 
# $$1-\frac{1}{2^p} + \frac{B_p}{2^p}\frac{1}{\sqrt{\alpha}}$$

# Where $B_p$ is the limit of a Parisi formula with $\xi(x) = (1+x)^p - 1$.

# In[21]:


# set parameters
# k is number of jumps
k=2
# if this range is too small, it fails at higher p
INPS = np.linspace(-30, 30, 400)
PDF_INPS = stats.norm.pdf(INPS)
PDF_INPS = PDF_INPS/np.sum(PDF_INPS)
assert np.allclose(sum(PDF_INPS),1), sum(PDF_INPS)
grid = np.meshgrid(*[INPS]*(k+1), indexing='ij')


# In[22]:


ksat_ps = range(3, 10)

# CONST_SQ may affect my convergence.
CONST_SQ = 0.5

def run_ksat(P):
#     xi = lambda x: CONST_SQ* (-1 + (1+x)**P)
    xiprime = lambda x: CONST_SQ* P * ((1+x)**(P-1))
    xiprimeprime = lambda x:CONST_SQ* P * (P-1) * ((1+x)**(P-2))
    opt = minimize(make_test(xiprime, xiprimeprime, k, PDF_INPS), 
                       [np.random.random()/k for _ in range(2*k)], 
                       method='Powell', 
                       options={"xtol": 1e-10, "ftol":1e-14}
                      )
    print("p:", P, opt.fun)
    return {"x": opt.x, "fun": opt.fun}


# In[23]:


get_ipython().run_cell_magic('time', '', '\nprint("kSAT:")\nr_ksat = []\nfor P in ksat_ps:\n    r_ksat.append(run_ksat(P))\n# pool = multiprocessing.Pool(processes=n_proc)\n# r_ksat = pool.map(run_ksat, ksat_ps)\n# pool.close()\n')


# In[31]:


num_vert_plots = int(np.ceil(len(ksat_ps)/5))
fig, axs = plt.subplots(num_vert_plots, 2, figsize=(12, 4*num_vert_plots))

for idx in range(len(r_ksat)):
    P = ksat_ps[idx]
    if idx % 5 == 0:
        ax = axs[idx // 10, (idx % 10)//5]
        ax.grid()

    qs = np.array([0,*[sum(r[idx]['x'][:k][:i+1]) for i in range(k)]])
    ms = np.array([0,*[sum(r[idx]['x'][k:][:i+1]) for i in range(k)]])
    ax.step(qs, ms, where='post', label="p=" + str(P))
    if idx % 5 == 4:
        ax.legend()

ax.legend()


# The constants after reducing by CONST_SQ (to help with convergence):

# In[32]:


outs = np.array([i['fun'] for i in r_ksat])
ksats_scaled = outs * CONST_SQ**-0.5


# In[33]:


for p,o in zip(ksat_ps, ksats_scaled):
    print(p, o)


# I calculate $C$, where the satisfying fraction is $1-1/2^p + C/\sqrt{\alpha}$:

# In[34]:


for p,o in zip(ksat_ps, ksats_scaled):
    print(p, o*2**(-p))


# It would be nice to get a confirmation on this constant, but I haven't seen it calculated anywhere.

# In[35]:


print("Ending time:", time.ctime())