import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import _lib.pr_func as pr
from _lib.utility import *
from scipy.interpolate import InterpolatedUnivariateSpline
%config InlineBackend.figure_format = 'retina'
sns.set_style("whitegrid")
#import sys
#sys.path.append('..')
The Legendre transform of a function $f:x\mapsto f(x)$ is given by a function $g:u\mapsto g(u)$, such that $\pm\frac{dg}{du}(u) = x(u)$. $g$ can be obtained by treating $x$ as a function of $u$ and defining $$ u := \frac{df}{dx}, \quad g(u) := \pm (u\, x(u) - f(x(u))) $$ since then automatically $\pm \frac{dg}{du} = x(u) + u \frac{dx}{du} - \frac{df}{dx} \frac{dx}{du} = x(u)$. Similarly, if $g$ is given, then $f$ can be reconstructed by $$ x := \pm \frac{dg}{du}, \quad f(x) := x \, u(x) \mp g(u(x)) \, . $$
The Legendre transform can be applied to single-task decision-making with utility function $U$ and fixed prior $p_0$. If we consider the equilibrium Free Energy as a function of $\tau := \frac{1}{\beta}$, $$ F(\tau) = \tau \log Z_\tau = \tau \log \sum_a p_0(a) e^{U(a)/\tau} \, , $$ then $F$ is the Legendre transform of the expected utility $\bar U := \mathbb E[U]$ as a function of $D_{KL}(p(a)\|p_0(a))$. This can be seen by explicitly differentiating $F$: We have
$$ \frac{dF}{d\tau} = \log Z_\tau + \frac{\tau}{Z_\tau} \sum_a p_0(a) e^{U(a)/\tau} \frac{-U(a)}{\tau^2} = \log Z_\tau -\frac{\bar U}{\tau} = \frac{1}{\tau} ( F(\tau) - \bar U) , $$and therefore, $$ I := - \frac{dF}{d\tau} = \frac{1}{\tau} (\bar U - F(\tau)) = \frac{\bar U}{\tau} - \log Z_\tau = D_{KL}(p(a)\|p_0(a)) \, . $$ and $$ f(I) = \tau I + F(\tau) = \bar U $$ Hence $F:\tau \mapsto F(\tau)$ is the Legendre transform of $\bar U: I\mapsto \bar U(I)$ and it follows that $\frac{d\bar U}{dI} = \tau = \frac{1}{\beta}$.
__Note:___ The same calculation works for multi-task decision-making, i.e. when $U$ is a function of $w$ and $a$. Here, the equilibrium Free Energy is given by $$ \frac{1}{\beta} \sum_w \rho(w) \log Z(w) . $$
In the limit case of a perfectly optimal prior, $p_0$ is replaced by $p(a) = \sum_w p_\beta(a|w) \rho(w)$, in particular, there might be a different prior for each $\beta$. The above derivation cannot be followed since there is no closed from expression for the equilibrium Free Energy in terms of $\beta$ anymore. In particular, by taking the derivative of $F$ with respect to $\tau$ as in the derivation above, the $\tau$-dependency of the prior would be neglected.
def RD(U,pw,beta):
pa = pr.func(vars=['a'], val='unif').normalize()
test = 0
for i in range(0,1000):
pagw = (pa*pr.exp(beta*U)).normalize(['a'])
pa = pr.sum(['w'],pagw*pw)
if np.linalg.norm(test-pagw.val)<1e-10:
break
test= pagw.val
EU = pr.sum(U*pagw*pw)
IWA = pr.sum(pw*pagw*pr.log(pagw/pa))
return EU.val, IWA.val
def nonRD(U,pw,beta):
pa = pr.func(vars=['a'], val='unif').normalize()
pagw = (pa*pr.exp(beta*U)).normalize(['a'])
EU = pr.sum(U*pagw*pw)
DKL = pr.sum(pw*pagw*pr.log(pagw/pa))
return EU.val, DKL.val
# setup
N,K = 20,20
pr.set_dims([('w',N),('a',K)])
U = pr.func(val=utility(N,K,6.0),vars=['w','a'])
# U = pr.func(val=np.eye(N),vars=['w','a'])
pw = pr.func(vars=['w'],val='unif').normalize()
# RD curve
EUs = []
Is = []
betas = np.linspace(0.3,5,1000)
for beta in betas:
EU,I = RD(U,pw,beta)
EUs.append(EU)
Is.append(I)
# nonRD curve
EUs_non = []
Is_non = []
for beta in betas:
EU,I = nonRD(U,pw,beta)
EUs_non.append(EU)
Is_non.append(I)
with plt.rc_context({"figure.figsize": (16,8)}):
plt.title("Rate distortion curves")
plt.plot(Is,EUs,label="optimal prior")
plt.plot(Is_non,EUs_non, label="non-optimal prior")
plt.legend()
plt.show()
# calc derivative of EU wrt I
f_non = InterpolatedUnivariateSpline(Is_non, EUs_non, k=5)
dfdx_non = f_non.derivative()
dUdx_non = dfdx_non(Is_non)
# compare against 1/beta
with plt.rc_context({"figure.figsize": (16,8)}):
plt.title("Comparison of ${dU}/{dI}$ and ${1}/{\\beta}$ -- non-optimal prior")
plt.plot(Is_non,1.0/betas,label="1/beta",linestyle="-",color="xkcd:silver")
plt.plot(Is_non,dUdx_non,label="dU/dI",linestyle=":",color="xkcd:denim blue")
plt.legend()
plt.show()
print "||dU/dI - 1/beta|| = {}".format(np.linalg.norm(dUdx_non-1.0/betas))
||dU/dI - 1/beta|| = 2.44547712642e-07
# calc derivative of EU wrt I
f = InterpolatedUnivariateSpline(Is, EUs, k=5)
dfdx = f.derivative()
dUdx = dfdx(Is)
# compare against 1/beta
with plt.rc_context({"figure.figsize": (16,8)}):
plt.title("Comparison of ${dU}/{dI}$ and ${1}/{\\beta}$ -- optimal prior")
plt.plot(Is,1.0/betas,label="1/beta",linestyle="-",color="xkcd:silver")
plt.plot(Is,dUdx,label="dU/dI",linestyle=":",color="xkcd:denim blue")
plt.legend()
plt.show()
print "||dU/dI - 1/beta|| = {}".format(np.linalg.norm(dUdx-1.0/betas))
||dU/dI - 1/beta|| = 13.0328221758
# zommed in
with plt.rc_context({"figure.figsize": (16,8)}):
plt.title("Comparison of ${dU}/{dI}$ and ${1}/{\\beta}$ -- optimal prior, zoomed in")
plt.scatter(Is,1.0/betas,label="1/beta",marker='.',color="xkcd:silver")
plt.scatter(Is,dUdx,label="dU/dI",marker='.',color="xkcd:denim blue")
plt.xlim([0,0.01])
plt.legend()
plt.show()