#!/usr/bin/env python
# coding: utf-8

# # Faster Computations with Numba

# ## Some notes mostly for myself, but could be useful to you

# Altough Python is fast compared to other high-level languages, it still is not as fast as C, C++ or Fortran. Luckily, two open source projects [Numba](http://numba.pydata.org/) and [Cython](http://cython.org/) can be used to speed-up computations. [Numba](http://numba.pydata.org/) is sponsored by the producer of [Anaconda](https://store.continuum.io/cshop/anaconda/), [Continuum Analytics](http://continuum.io/). Both projects allow you to convert your code to interpreted language so that it runs faster. Here I will use Numba, given its ease and Just-In-Time nature, although I still have to figure out how to save and use the compiled functions (newer versions of [Numba](http://numba.pydata.org/) seem to have introduced [Ahead-of-Time](https://numba.pydata.org/numba-doc/dev/user/pycc.html) compilation using pycc and static libraries it seems.). Cython on the other hand needs you to understand much more of C and Ctypes. But once you figure out a way to run your code and compile it, you have a library ready to use and import. See the code for the repository for my paper [Optimal consumption under uncertainty, liquidity constraints, and bounded rationality](http://ozak.github.io/BoundedConsumption/) where I used Cython for the dynsysf.pyx function.

# # Numba

# Numba is quite easy to use. Start by importing it
# 
#     import numba as nb
# 
# or some of its functions
# 
#     from numba import jit, autojit, njit
#     
# Then define your function and notate it using the Numba commands jit, njit, autojit or their decorators @jit, @njit.

# In[1]:


from numba import jit, njit, autojit, jitclass
import numba as nb
import math
import warnings

with warnings.catch_warnings():
    warnings.simplefilter('ignore', nb.errors.NumbaDeprecationWarning)


# ## Examples:

# In[2]:


# A simple function that computes the maximum distance between two vectors
@nb.jit('f8(f8[:],f8[:])')
def errnb(V0,V1):
    maxi=0.0
    for i in range(V0.shape[0]):
        m = abs(V1[i]-V0[i])
        if m>=maxi:
            maxi = m
    return maxi

x = np.random.random((1000))
y = np.random.random((1000))

get_ipython().run_line_magic('timeit', 'errnb(x,y)')
get_ipython().run_line_magic('timeit', 'np.max(np.abs(x-y))')


# In[3]:


# Let's use Numba to compute a Cobb-Douglas function
@jit
def CobbDouglas(K, L, A, alpha, beta):
    return A * K**alpha * L**beta

K, L, A, alpha, beta = 2, 2, 1, .3, .7

CobbDouglas(K,L,A,alpha,beta)


# Let's time it and compare with Numpy or simple Python

# In[4]:


get_ipython().run_line_magic('timeit', 'A * K**alpha * L**beta')


# In[5]:


get_ipython().run_line_magic('timeit', 'CobbDouglas(K,L,A,alpha,beta)')


# Well not fast enough...why?

# In[6]:


CobbDouglas.inspect_types()


# OK...it is correctly compiled and it is using Numba types and not PyObjects. So perhaps we cannot gain much in this simple computation...but that is ok. Let's try something a little more complex, computing a CobbDouglas for vectors K and L

# In[7]:


# Python function
def CobbDouglas(K, L, A, alpha, beta):
    return A * K**alpha * L**beta

CobbDouglas_nb = jit(CobbDouglas)
CobbDouglas_nb2 = njit(CobbDouglas)


# In[8]:


K = np.random.random((100,1))
L = np.random.random((100,1))


# In[9]:


get_ipython().run_line_magic('timeit', 'CobbDouglas(K,L,A,alpha,beta)')


# In[10]:


get_ipython().run_line_magic('timeit', 'CobbDouglas_nb(K,L,A,alpha,beta)')


# In[11]:


get_ipython().run_line_magic('timeit', 'CobbDouglas_nb2(K,L,A,alpha,beta)')


# Ooops what went wrong? Up to V.0.12.2 Numba could not create arrays in ``njit`` mode, i.e. without using Python Objects. But things seem to have improved a lot since then. But even for the old days, we do not need to despair, we can create a fast CobbDouglas function by iterating over our vectors. Here I first create the `CobbDouglasNB` function which takes floating point numbers and tells Numba not to use PyObjects, and then I create the vectorized version `CobbDouglasVecNB`, which iterates over the elements of K and L (here we assume both are vectors) and returns our output Y. I know it is strange that we have to give the output as part of the function, but it allows us to speed up the computations.

# In[12]:


@nb.njit('f8(f8, f8, f8, f8, f8)')
def CobbDouglasNB(K, L, A, alpha, beta):
    return A * K**alpha * L**beta

@nb.jit('f8[:](f8[:], f8[:], f8[:], f8, f8, f8)')
def CobbDouglasVecNB(Y, K, L, A, alpha, beta):
    for i in range(K.shape[0]):
        Y[i]=CobbDouglasNB(K[i],L[i],A,alpha,beta)
    return Y


# In[13]:


K.mean()


# In[14]:


get_ipython().run_line_magic('timeit', 'CobbDouglas(K[:,0],L[:,0],A,alpha,beta)')


# In[15]:


Y=np.zeros_like(K)
get_ipython().run_line_magic('timeit', 'CobbDouglasVecNB(Y[:,0],K[:,0],L[:,0],A,alpha,beta)')


# ## Almost twice as fast as Numpy!

# Let's generalize the function to arrays of 2 dimensions

# In[16]:


@nb.jit('f8[:,:](f8[:,:], f8[:,:], f8[:,:], f8, f8, f8)')
def CobbDouglasVecNB(Y, K, L, A, alpha, beta):
    for i in range(K.shape[0]):
        for j in range(K.shape[1]):
            Y[i,j]=CobbDouglasNB(K[i,j], L[i,j], A, alpha, beta)
    return Y


# In[17]:


K=np.random.random((1000,1000))
L=np.random.random((1000,1000))
get_ipython().run_line_magic('timeit', 'CobbDouglas(K,L,A,alpha,beta)')


# In[18]:


Y=np.zeros_like(K)
get_ipython().run_line_magic('timeit', 'CobbDouglasVecNB(Y,K,L,A,alpha,beta)')


# ### 20% faster than Numpy...so we get the idea.

# While this is great to speed up, we see that there are some issues one has to think about in order to get those gains.
# 
# Now let's use the vectorize option, which creates Numpy Ufunctions, that is functions that operate on vectors (or at leats that s what I gather).

# In[19]:


@nb.vectorize('f8(f8, f8, f8, f8, f8,)')
def CobbDouglasNB2(K, L, A, alpha, beta):
    return A * K**alpha * L**beta


# In[20]:


get_ipython().run_line_magic('timeit', 'CobbDouglasNB2(K,L,A,alpha,beta)')


# Let's compare these functions on different sizes of matrices $K$ and $L$

# In[21]:


K = np.random.random((10000, 10000))
L = np.random.random((10000, 10000))
Y = np.zeros_like(K)


# In[22]:


get_ipython().run_line_magic('timeit', 'CobbDouglas(K[1:100,1:100],L[1:100,1:100],A,alpha,beta)')


# In[23]:


get_ipython().run_line_magic('timeit', 'CobbDouglasVecNB(Y[1:100,1:100],K[1:100,1:100],L[1:100,1:100],A,alpha,beta)')


# In[24]:


get_ipython().run_line_magic('timeit', 'CobbDouglasNB2(K[1:100,1:100],L[1:100,1:100],A,alpha,beta)')


# In[25]:


get_ipython().run_line_magic('timeit', 'CobbDouglas(K[1:1000,1:1000],L[1:1000,1:1000],A,alpha,beta)')


# In[26]:


get_ipython().run_line_magic('timeit', 'CobbDouglasVecNB(Y[1:1000,1:1000],K[1:1000,1:1000],L[1:1000,1:1000],A,alpha,beta)')


# In[27]:


get_ipython().run_line_magic('timeit', 'CobbDouglasNB2(K[1:1000,1:1000],L[1:1000,1:1000],A,alpha,beta)')


# Now let's try the CRRA utility function

# In[28]:


@nb.jit('f8(f8, f8)')
def U_numba(c, sigma):
    '''This function returns the value of utility when the CRRA
    coefficient is sigma. I.e. 
    u(c,sigma)=(c**(1-sigma)-1)/(1-sigma) if sigma!=1 
    and 
    u(c,sigma)=ln(c) if sigma==1
    Usage: u(c,sigma)
    '''
    if sigma!=1:
        u = (c**(1-sigma)-1)/(1-sigma)
    else:
        u = math.log(c)
    return u


# In[29]:


@nb.jit('f8[:](f8[:], f8[:], f8)')
def Uvec(u,c,sigma):
    if sigma!=1:
        for i in range(c.shape[0]):
            u[i] = U_numba(c[i], sigma)
    else:
        u = np.log(c)
    return u


# In[30]:


def Unp(c,sigma):
    if sigma!=1:
        u=(c**(1-sigma)-1)/(1-sigma)
    else:
        u=np.log(c)
    return u


# In[31]:


@nb.vectorize('f8(f8,f8)')
def Unb(c,sigma):
    if sigma!=1:
        u = (c**(1-sigma)-1)/(1-sigma)
    else:
        u = math.log(c)
    return u


# In[32]:


@nb.jit('f8[:](f8[:], f8)')
def U(c, sigma):
    if sigma!=1:
        u = Unb(c,sigma)#(c**(1-sigma)-1)/(1-sigma)
    else:
        u = np.log(c)
    return u


# In[33]:


# Grid of values for state variable over which function will be approximated
gridmin, gridmax, gridsize = 0.1, 5, 300
grid = np.linspace(gridmin, gridmax**1e-1, gridsize)**10


# In[34]:


get_ipython().run_line_magic('timeit', 'U(grid,1)')


# In[35]:


get_ipython().run_line_magic('timeit', 'Unp(grid,1)')


# In[36]:


get_ipython().run_line_magic('timeit', 'Unb(grid,1)')


# In[37]:


u = np.zeros_like(grid)
get_ipython().run_line_magic('timeit', 'Uvec(u, grid, 1)')


# In[38]:


get_ipython().run_line_magic('timeit', 'U(grid,3)')


# In[39]:


get_ipython().run_line_magic('timeit', 'Unb(grid,3)')


# In[40]:


get_ipython().run_line_magic('timeit', 'Unp(grid,3)')


# In[41]:


u = np.zeros_like(grid)
get_ipython().run_line_magic('timeit', 'Uvec(u,grid,3)')


# # Optimizing the code for Dynamic Programming

# ## Optimal Growth

# In[42]:


# Parameters Optimal Growth
alpha = .3
beta = .9
sigma = 1
delta = 1
A = 1


# In[43]:


# Grid of values for state variable over which function will be approximated
gridmin, gridmax, gridsize = 0.1, 5, 300
grid = np.linspace(gridmin, gridmax**1e-1, gridsize)**10

# Parameters for the optimization procedures
count=0
maxiter=1000
tol=1e-6
print('tol=%f' % tol)
print(grid.shape)


# In[44]:


import numba as nb
from scipy.optimize import fminbound
from scipy import interp

# Auxiliary functions 
# Maximize function V on interval [a,b]
def maximum(V, a, b, args=[]):
    return float(V(fminbound(lambda x: -V(x), a, b,args=args)))

# Return Maximizer of function V on interval [a,b]
def maximizer(V, a, b, args=[]):
    return float(fminbound(lambda x: -V(x), a, b, args=args))

# Interpolation functions Class
class LinInterp:
    "Provides linear interpolation in one dimension."

    def __init__(self, X, Y):
        """Parameters: X and Y are sequences or arrays
        containing the (x,y) interpolation points.
        """
        self.X, self.Y = X, Y

    def __call__(self, z):
        """Parameters: z is a number, sequence or array.
        This method makes an instance f of LinInterp callable,
        so f(z) returns the interpolation value(s) at z.
        """
        if isinstance(z, int) or isinstance(z, float):
            return interp ([z], self.X, self.Y)[0]
        else:
            return interp(z, self.X, self.Y)


# In[45]:


@nb.jit('f8[:](f8[:], f8)')
def U(c,sigma):
    if sigma!=1:
        u = Unb(c,sigma)
    else:
        u = np.log(c)
    return u

@nb.vectorize('f8(f8,f8)')
def Unb(c,sigma):
    if sigma!=1:
        u = (c**(1-sigma)-1)/(1-sigma)
    else:
        u = math.log(c)
    return u


# In[46]:


@nb.vectorize('f8(f8, f8, f8, f8, f8,)')
def CobbDouglasNB2(K, L, A, alpha, beta):
    '''CobbDouglasNB2(K, L, A, alpha, beta)'''
    return A * K**alpha * L**beta

@nb.vectorize('f8(f8, f8, f8)')
def F_nb(K, alpha, A):
    '''
    Cobb-Douglas production function
    F(K)=A* K^alpha
    '''
    return A * K**alpha

def F_np(K, alpha, A):
    '''
    Cobb-Douglas production function
    F(K)=A* K^alpha
    '''
    return A * K**alpha


# In[47]:


get_ipython().run_line_magic('timeit', 'CobbDouglasNB2(grid,1,A,alpha,0)')


# In[48]:


get_ipython().run_line_magic('timeit', 'F_nb(grid,alpha,A)')


# In[49]:


get_ipython().run_line_magic('timeit', 'F_np(grid,alpha,A)')


# In[50]:


V0 = LinInterp(grid,U(grid,sigma))


# In[51]:


def bellman(x,w):
    """The approximate Bellman operator.
    Parameters: w is a LinInterp object (i.e., a 
    callable object which acts pointwise on arrays).
    Returns: An instance of LinInterp that represents the optimal operator.
    w is a function defined on the state space.
    """
    vals = []
    for k in x:
        kmax=F_nb(k,alpha,A)
        h = lambda kp: U_numba(kmax + (1-delta) * k - kp,sigma) + beta * w(kp)
        vals.append(maximum(h, 0, kmax))
    return LinInterp(grid, vals)


# In[52]:


def policy(x,w):
    """
    For each function w, policy(w) returns the function that maximizes the 
    RHS of the Bellman operator.
    Replace w for the Value function to get optimal policy.
    The approximate optimal policy operator w-greedy (See Stachurski (2009)). 
    Parameters: w is a LinInterp object (i.e., a 
    callable object which acts pointwise on arrays).
    Returns: An instance of LinInterp that captures the optimal policy.
    """
    vals = []
    for k in x:
        kmax=F_nb(k,alpha,A)
        h = lambda kp: U_numba(kmax + (1-delta) * k - kp, sigma) + beta * w(kp)
        vals.append(maximizer(h, 0, kmax))
    return LinInterp(grid, vals)


# In[53]:


def solve():
    count=0
    V0=LinInterp(grid,U(grid,sigma))
    while count<maxiter:
        V1 = bellman(grid,V0)
        err = np.max(np.abs(np.array(V1(grid))-np.array(V0(grid))))
        V0 = V1
        count += 1
        if err<tol:
            print(count)
            break
    return V0


# In[54]:


V0 = bellman(grid, V0)
C0 = policy(grid, V0)


# In[55]:


get_ipython().run_line_magic('timeit', 'bellman(grid, V0)')
get_ipython().run_line_magic('timeit', 'policy(grid ,V0)')
get_ipython().run_line_magic('timeit', 'solve()')


# In[56]:


def Utrf(kp,kmax,k,sigma,w):
    return -U_numba(kmax + (1-delta) * k - kp,sigma)-beta*w(kp)

@nb.jit('f8[:](f8[:], f8[:])')
def bellmannb(x,w0):
    """The approximate Bellman operator.
    Parameters: w is a LinInterp object (i.e., a 
    callable object which acts pointwise on arrays).
    Returns: An instance of LinInterp that represents the optimal operator.
    w is a function defined on the state space.
    """
    w = LinInterp(x,w0)
    vals = np.array([])
    for k in x:
        kmax = F_nb(k,alpha,A)
        vals = np.append(vals, -Utrf(fminbound(Utrf, 0, kmax, args=[kmax,k,sigma,w]),kmax,k,sigma,w))
    return vals

@nb.jit('f8[:](f8[:],f8[:])')
def policynb(x,w0):
    """The approximate Bellman operator.
    Parameters: w is a LinInterp object (i.e., a 
    callable object which acts pointwise on arrays).
    Returns: An instance of LinInterp that represents the optimal operator.
    w is a function defined on the state space.
    """
    w = LinInterp(x,w0)
    vals = np.array([])
    for k in x:
        kmax = F_nb(k,alpha,A)
        vals = np.append(vals,fminbound(Utrf, 0, kmax, args=[kmax,k,sigma,w]))
    return vals

@nb.jit('f8(f8[:],f8[:])')
def errnb(V0,V1):
    maxi=0.0
    for i in range(V0.shape[0]):
        m=abs(V1[i]-V0[i])
        if m>=maxi:
            maxi=m
    return maxi

@nb.jit('f8[:]()')
def solvenb():
    count=0.0
    V0=U(grid,sigma)
    while count<maxiter:
        V1=bellmannb(grid,V0)
        err=errnb(V1,V0)
        V0=V1
        count+=1
        if err<tol:
            print(count)
            break
    return V0


# In[57]:


U0 = U(grid, sigma)

U0 = bellmannb(grid, U0)

c0 = policynb(grid, U0)


# In[58]:


get_ipython().run_line_magic('timeit', 'bellmannb(grid, U0)')
get_ipython().run_line_magic('timeit', 'policynb(grid, U0)')
get_ipython().run_line_magic('timeit', 'solvenb()')


# In[59]:


plt.plot(grid, V0(grid), label='V0')
plt.plot(grid, U0)
plt.legend()


# In[60]:


plt.plot(grid, C0(grid), label='C0')
plt.plot(grid, c0)
plt.legend()


# In[61]:


@nb.jit('f8(f8[:], f8[:])')
def errnb(V0, V1):
    maxi=0.0
    for i in range(V0.shape[0]):
        m=abs(V1[i]-V0[i])
        if m>=maxi:
            maxi=m
    return maxi

@nb.jit('f8[:]()')
def solvenb():
    count=0.0
    V0 = U(grid,sigma)
    while count<maxiter:
        V1 = bellmannb(grid, V0)
        err = errnb(V1, V0)
        V0 = V1
        count += 1
        if err<tol:
            print(count)
            break
    return V0
get_ipython().run_line_magic('timeit', 'solvenb()')