#!/usr/bin/env python
# coding: utf-8

# Optimal Transport with Linear Programming
# =========================================
# 
# *Important:* Please read the [installation page](http://gpeyre.github.io/numerical-tours/installation_python/) for details about how to install the toolboxes.
# $\newcommand{\dotp}[2]{\langle #1, #2 \rangle}$
# $\newcommand{\enscond}[2]{\lbrace #1, #2 \rbrace}$
# $\newcommand{\pd}[2]{ \frac{ \partial #1}{\partial #2} }$
# $\newcommand{\umin}[1]{\underset{#1}{\min}\;}$
# $\newcommand{\umax}[1]{\underset{#1}{\max}\;}$
# $\newcommand{\umin}[1]{\underset{#1}{\min}\;}$
# $\newcommand{\uargmin}[1]{\underset{#1}{argmin}\;}$
# $\newcommand{\norm}[1]{\|#1\|}$
# $\newcommand{\abs}[1]{\left|#1\right|}$
# $\newcommand{\choice}[1]{ \left\{  \begin{array}{l} #1 \end{array} \right. }$
# $\newcommand{\pa}[1]{\left(#1\right)}$
# $\newcommand{\diag}[1]{{diag}\left( #1 \right)}$
# $\newcommand{\qandq}{\quad\text{and}\quad}$
# $\newcommand{\qwhereq}{\quad\text{where}\quad}$
# $\newcommand{\qifq}{ \quad \text{if} \quad }$
# $\newcommand{\qarrq}{ \quad \Longrightarrow \quad }$
# $\newcommand{\ZZ}{\mathbb{Z}}$
# $\newcommand{\CC}{\mathbb{C}}$
# $\newcommand{\RR}{\mathbb{R}}$
# $\newcommand{\EE}{\mathbb{E}}$
# $\newcommand{\Zz}{\mathcal{Z}}$
# $\newcommand{\Ww}{\mathcal{W}}$
# $\newcommand{\Vv}{\mathcal{V}}$
# $\newcommand{\Nn}{\mathcal{N}}$
# $\newcommand{\NN}{\mathcal{N}}$
# $\newcommand{\Hh}{\mathcal{H}}$
# $\newcommand{\Bb}{\mathcal{B}}$
# $\newcommand{\Ee}{\mathcal{E}}$
# $\newcommand{\Cc}{\mathcal{C}}$
# $\newcommand{\Gg}{\mathcal{G}}$
# $\newcommand{\Ss}{\mathcal{S}}$
# $\newcommand{\Pp}{\mathcal{P}}$
# $\newcommand{\Ff}{\mathcal{F}}$
# $\newcommand{\Xx}{\mathcal{X}}$
# $\newcommand{\Mm}{\mathcal{M}}$
# $\newcommand{\Ii}{\mathcal{I}}$
# $\newcommand{\Dd}{\mathcal{D}}$
# $\newcommand{\Ll}{\mathcal{L}}$
# $\newcommand{\Tt}{\mathcal{T}}$
# $\newcommand{\si}{\sigma}$
# $\newcommand{\al}{\alpha}$
# $\newcommand{\la}{\lambda}$
# $\newcommand{\ga}{\gamma}$
# $\newcommand{\Ga}{\Gamma}$
# $\newcommand{\La}{\Lambda}$
# $\newcommand{\si}{\sigma}$
# $\newcommand{\Si}{\Sigma}$
# $\newcommand{\be}{\beta}$
# $\newcommand{\de}{\delta}$
# $\newcommand{\De}{\Delta}$
# $\newcommand{\phi}{\varphi}$
# $\newcommand{\th}{\theta}$
# $\newcommand{\om}{\omega}$
# $\newcommand{\Om}{\Omega}$

# This numerical tours details how to solve the discrete optimal transport
# problem (in the case of measures that are sums of Diracs) using linear
# programming.

# You need to install [CVXPY](https://www.cvxpy.org/). _Warning:_ seems to not be working on Python 3.7, use rather 3.6.

# In[1]:


import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp


# Optimal Transport of Discrete Distributions
# ------------------------------------------
# 
# We consider two dicretes distributions
# $$ \al = \sum_{i=1}^n a_i \de_{x_i} \qandq 
#    \be = \sum_{j=1}^m b_j \de_{y_j}. $$
#    
# where $n,m$ are the number of points, $\de_x$ is the Dirac at
# location $x \in \RR^d$, and $(x_i)_i, (y_j)_j$ are the positions of the diracs in $\RR^d$. 

# Dimensions $(n,m)$ of the coulds.

# In[2]:


n = 60
m = 80


# Generate the point clouds $X=(x_i)_i$ and $Y=(y_j)_j$.

# In[3]:


gauss = lambda q,a,c: a*np.random.randn(2, q) + np.transpose(np.tile(c, (q,1)))
X = np.random.randn(2,n)*.3
Y = np.hstack((gauss(int(m/2),.5,[0,1.6]),np.hstack((gauss(int(m/4),.3,[-1,-1]),gauss(int(m/4),.3,[1,-1])))))


# Generate the weights $a,b$.

# In[4]:


normalize = lambda a: a/np.sum(a)
a = normalize(np.random.rand(n, 1))
b = normalize(np.random.rand(m, 1))


# Helper function for display of clouds.

# In[5]:


myplot = lambda x,y,ms,col: plt.scatter(x,y, s=ms*20, edgecolors="k", c=col, linewidths=2)


# Display the point clouds.
# The size of each dot is proportional to its probability density weight.

# In[6]:


plt.figure(figsize = (10,7))
plt.axis("off")
for i in range(len(a)):
    myplot(X[0,i], X[1,i], a[i]*len(a)*10, 'b')
for j in range(len(b)):
    myplot(Y[0,j], Y[1,j], b[j]*len(b)*10, 'r')
plt.xlim(np.min(Y[0,:])-.1,np.max(Y[0,:])+.1)
plt.ylim(np.min(Y[1,:])-.1,np.max(Y[1,:])+.1)
plt.show()


# Compute the cost matrix $C_{i,j} := \norm{x_i-x_j}^2$.

# In[7]:


def distmat(x,y):
    return np.sum(x**2,0)[:,None] + np.sum(y**2,0)[None,:] - 2*x.transpose().dot(y)
C = distmat(X,Y)


# Define the optimiztion variable $P$.

# In[8]:


P = cp.Variable((n,m))


# Define the set of discrete couplings between $\al$ and $\be$
# $$ U(a,b) := \enscond{ P \in \RR_+^{n \times m} }{
#      \forall i, \sum_j P_{i,j} = a_i, \:
#      \forall j, \sum_i P_{i,j} = b_j }. $$

# In[9]:


u = np.ones((m,1))
v = np.ones((n,1))
U = [0 <= P, cp.matmul(P,u)==a, cp.matmul(P.T,v)==b]


# The Kantorovitch formulation of the optimal transport reads
# $$ P^\star \in \uargmin{P \in U(a,b)} \sum_{i,j} P_{i,j} C_{i,j}.  $$
# Solve it using CVXPY

# In[10]:


objective = cp.Minimize( cp.sum(cp.multiply(P,C)) )
prob = cp.Problem(objective, U)
result = prob.solve()


# An optimal coupling $P^\star$ can be shown to be a sparse matrix
# with less than $n+m-1$ non zero entries. An entry $P_{i,j}^\star \neq 0$
# should be understood as a link between $x_{i}$
# and $y_{j}$ where an amount of mass equal to $P_{i,j}^\star$ is transfered.

# Check that the number of non-zero entries in $P^\star$ is $n+m-1$. Beware that we are using an interior point method here, so that entries of $P^\star$ are nevery exactly 0.

# In[11]:


print("Number of non-zero: %d (n + m-1 = %d)" %(len(P.value[P.value>1e-5]), n + m-1))


# Display the solution coupling.

# In[12]:


plt.figure(figsize = (5,5))
plt.imshow(P.value);


# Display the connexion defined by the optimal coupling.

# In[13]:


I,J = np.nonzero(P.value>1e-5)
plt.figure(figsize = (10,7))
plt.axis('off')
for k in range(len(I)):
    h = plt.plot(np.hstack((X[0,I[k]],Y[0,J[k]])),np.hstack(([X[1,I[k]], Y[1,J[k]]])),'k', lw = 2)
for i in range(len(a)):
    myplot(X[0,i], X[1,i], a[i]*len(a)*10, 'b')
for j in range(len(b)):
    myplot(Y[0,j], Y[1,j], b[j]*len(b)*10, 'r')
plt.xlim(np.min(Y[0,:])-.1,np.max(Y[0,:])+.1)
plt.ylim(np.min(Y[1,:])-.1,np.max(Y[1,:])+.1)
plt.show()


# Displacement Interpolation
# --------------------------
# For any $t \in [0,1]$, one can define a distribution $\mu_t$ such
# that $t \mapsto \mu_t$ defines a geodesic for the Wasserstein metric.
# 
# 
# Since the $W_2$ distance is a geodesic distance, this geodesic path solves the
# following variational problem
# 
# $$ \mu_t = \uargmin{\mu} (1-t)W_2(\al,\mu)^2 + t W_2(\be,\mu)^2. $$
# This can be understood as a generalization of the usual Euclidean
# barycenter to barycenter of distribution. Indeed, in the case that
# $\al = \de_{x}$ and $\be=\de_y$, one has $\mu_t=\de_{x_t}$ where $ x_t =
# (1-t)x+t y $.
# 
# 
# Once the optimal coupling $P^\star$ has been computed, the
# interpolated distribution is obtained as
# 
# $$ \mu_t = \sum_{i,j} P^\star_{i,j} \de_{(1-t)x_{i} + t y_{j}}. $$

# Find the $i,j$ with non-zero $P_{i,j}^\star$.

# In[14]:


I,J = np.nonzero(P.value>1e-5)
Pij = P.value[I,J]


# Display the evolution of $\mu_t$ for a varying value of $t \in [0,1]$.

# In[15]:


plt.figure(figsize =(15,10))
tlist = np.linspace(0, 1, 6)
for i in range(len(tlist)):
    t = tlist[i]
    Xt = (1-t)*X[:,I] + t*Y[:,J]
    plt.subplot(2,3,i+1)
    plt.axis("off")
    for j in range(len(Pij)):
        myplot(Xt[0,j],Xt[1,j],Pij[j]*len(Pij)*6,[[t,0,1-t]])
    plt.title("t = %.1f" %t) 
    plt.xlim(np.min(Y[0,:])-.1,np.max(Y[0,:])+.1)
    plt.ylim(np.min(Y[1,:])-.1,np.max(Y[1,:])+.1)
plt.show()


# Optimal Assignement
# -------------------
# In the case where $n=m$ and the weights are uniform $a_{i}=1/n, b_{j}=1/n$, one can show that there is at least one optimal transport coupling which is actually a
# permutation matrix. This properties comes from the fact that
# the extremal point of the polytope $U(1,1)$ are permutation matrices.
# 
# 
# This means that there exists an optimal permutation $\si^\star \in \Sigma_n$ such
# that
# $$ P^\star_{i,j} = \choice{
#       1 \qifq j=\si^\star(i), \\
#       0 \quad\text{otherwise}.
#   } $$
#   
# where $\Si_n$ is the set of permutation (bijections) of
# $\{1,\ldots,n\}$.
# 
# 
# This permutation thus solves the so-called optimal assignement problem
# $$ \si^\star \in \uargmin{\si \in \Sigma_n}
#       \sum_{i} C_{i,\si(j)}. $$

# Use the same number of points.

# In[16]:


n = 40
m = n


# Compute points clouds.

# In[24]:


X = np.random.randn(2,n)*.3
Y = np.hstack((gauss(int(m/2),.5,[0,1.6]),np.hstack((gauss(int(m/4),.3,[-1,-1]),gauss(int(m/4),.3,[1,-1])))))


# Constant distributions.

# In[25]:


a = np.ones([n,1])/n
b = a


# Compute the weight matrix $ (C_{i,j})_{i,j}. $

# In[26]:


C = distmat(X,Y)


# Display the coulds.

# In[27]:


plt.figure(figsize = (10,7))
plt.axis('off')
myplot(X[0,:],X[1,:],10,'b')
myplot(Y[0,:],Y[1,:],10,'r')
plt.xlim(np.min(Y[0,:])-.1,np.max(Y[0,:])+.1)
plt.ylim(np.min(Y[1,:])-.1,np.max(Y[1,:])+.1)
plt.show()


# Solve the optimal transport.

# In[28]:


P = cp.Variable((n,m))
u = np.ones((m,1))
v = np.ones((n,1))
U = [0 <= P, cp.matmul(P,u)==a, cp.matmul(P.T,v)==b]
objective = cp.Minimize( cp.sum(cp.multiply(P,C)) )
prob = cp.Problem(objective, U)
result = prob.solve()


# Show that $P$ is a binary permutation matrix.

# In[29]:


plt.figure(figsize = (5,5))
plt.imshow(P.value);


# Display the optimal assignement.

# In[30]:


I,J = np.nonzero(P.value>1e-5)
plt.figure(figsize = (10,7))
plt.axis('off')
for k in range(len(I)):
    h = plt.plot(np.hstack((X[0,I[k]],Y[0,J[k]])),np.hstack(([X[1,I[k]], Y[1,J[k]]])),'k', lw = 2)
myplot(X[0,:], X[1,:], 10, 'b')
myplot(Y[0,:], Y[1,:], 10, 'r')
plt.xlim(np.min(Y[0,:])-.1,np.max(Y[0,:])+.1)
plt.ylim(np.min(Y[1,:])-.1,np.max(Y[1,:])+.1)
plt.show()

<script>
  $(document).ready(function(){
      $('div.prompt').hide();
  });
</script>