Miranda and Fackler, Applied Computational Economics and Finance, 2002, Section 7.6.3
%matplotlib inline
from __future__ import division, print_function
import numpy as np
import matplotlib.pyplot as plt
import quantecon as qe
from quantecon.markov import DiscreteDP
maxage = 5
repcost = 75
mancost = 10
beta = 0.9
m = 3 # Number of actions; 0: keep, 1: service, 2: replace
# Construct the state space which is two-dimensional
S = qe.cartesian([np.arange(1, maxage+1), np.arange(maxage)]).astype(int)
n = len(S)
S
array([[1, 0], [1, 1], [1, 2], [1, 3], [1, 4], [2, 0], [2, 1], [2, 2], [2, 3], [2, 4], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [4, 0], [4, 1], [4, 2], [4, 3], [4, 4], [5, 0], [5, 1], [5, 2], [5, 3], [5, 4]])
Here, in the state space we include states that are not reached due to the constraint that the asset can be serviced at most one per year, i.e., those pairs of the age of asset $a$ and the number of services $s$ such that $s \geq a$.
One can alternatively define the state space excluding those states; see below.
# We need a routine to get the index of a age-serv pair
def getindex(age, serv, S):
"""
Get the index of [age, serv] in S.
We know that elements in S are aligned in a lexicographic order.
"""
for i in range(n):
if S[i, 0] == age:
for k in range(maxage):
if S[i+k, 1] == serv:
return i+k
# Profit function as a function of the age and the number of service
def p(age, serv):
return (1 - (age - serv)/5) * (50 - 2.5 * age - 2.5 * age**2)
# Reward array
R = np.empty((n, m))
R[:, 0] = p(S[:, 0], S[:, 1])
R[:, 1] = p(S[:, 0], S[:, 1]+1) - mancost
R[:, 2] = p(0, 0) - repcost
# Infeasible actions
for serv in range(maxage):
for action in [0, 1]:
R[getindex(maxage, serv, S), action] = -np.inf
R
array([[ 36., 35., -25.], [ 45., 44., -25.], [ 54., 53., -25.], [ 63., 62., -25.], [ 72., 71., -25.], [ 21., 18., -25.], [ 28., 25., -25.], [ 35., 32., -25.], [ 42., 39., -25.], [ 49., 46., -25.], [ 8., 2., -25.], [ 12., 6., -25.], [ 16., 10., -25.], [ 20., 14., -25.], [ 24., 18., -25.], [ 0., -10., -25.], [ 0., -10., -25.], [ 0., -10., -25.], [ 0., -10., -25.], [ 0., -10., -25.], [-inf, -inf, -25.], [-inf, -inf, -25.], [-inf, -inf, -25.], [-inf, -inf, -25.], [-inf, -inf, -25.]])
# (Degenerate) transition probability array
Q = np.zeros((n, m, n))
for i in range(n):
Q[i, 0, getindex(min(S[i, 0]+1, maxage), S[i, 1], S)] = 1
Q[i, 1, getindex(min(S[i, 0]+1, maxage), min(S[i, 1]+1, maxage-1), S)] = 1
Q[i, 2, getindex(1, 0, S)] = 1
# Create a DiscreteDP
ddp = DiscreteDP(R, Q, beta)
# Solve the dynamic optimization problem (by policy iteration)
res = ddp.solve()
# Number of iterations
res.num_iter
3
# Optimal policy
res.sigma
array([1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
# Optimal actions for reachable states
for i in range(n):
if S[i, 0] > S[i, 1]:
print(S[i], res.sigma[i])
[1 0] 1 [2 0] 0 [2 1] 1 [3 0] 2 [3 1] 0 [3 2] 0 [4 0] 2 [4 1] 2 [4 2] 2 [4 3] 2 [5 0] 2 [5 1] 2 [5 2] 2 [5 3] 2 [5 4] 2
# Simulate the controlled Markov chain
initial_state = getindex(1, 0, S)
ts_length = 13
path = res.mc.simulate(ts_length=ts_length, init=initial_state)
# Plot sample paths of the age of asset (0th coordinate of S)
# and the number of services (1st coordinate of S)
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
captions = ['Age of Asset', 'Number of Services']
for i, caption in zip(range(2), captions):
ax[i].plot(S[path, i])
ax[i].set_xlabel('Year')
ax[i].set_ylabel(caption)
ax[i].set_title('Optimal State Path: ' + caption)
ax[0].set_yticks(np.linspace(1, 4, 4, endpoint=True))
ax[1].set_yticks(np.linspace(1, 2, 2, endpoint=True))
ax[1].set_ylim(0, 2.25)
plt.show()
Define the state space excluding the age
-serv
pairs that do not realize:
from itertools import product
n = maxage * (maxage+1) // 2 # Number of states
S = np.empty((n, 2), dtype=int)
i = 0
for age, serv in product(range(1, maxage+1), range(maxage)):
if age > serv:
S[i] = age, serv
i += 1
S
array([[1, 0], [2, 0], [2, 1], [3, 0], [3, 1], [3, 2], [4, 0], [4, 1], [4, 2], [4, 3], [5, 0], [5, 1], [5, 2], [5, 3], [5, 4]])
We follow the state-action pairs formulation approach.
# Reward array
R = np.empty((n, m))
for i, (age, serv) in enumerate(S):
R[i, 0] = p(age, serv) if age < maxage else -np.infty
R[i, 1] = p(age, serv+1) - mancost if age < maxage else -np.infty
R[i, 2] = p(0, 0) - repcost
R
array([[ 36., 35., -25.], [ 21., 18., -25.], [ 28., 25., -25.], [ 8., 2., -25.], [ 12., 6., -25.], [ 16., 10., -25.], [ 0., -10., -25.], [ 0., -10., -25.], [ 0., -10., -25.], [ 0., -10., -25.], [-inf, -inf, -25.], [-inf, -inf, -25.], [-inf, -inf, -25.], [-inf, -inf, -25.], [-inf, -inf, -25.]])
s_indices, a_indices = np.where(R > -np.infty)
R = R[s_indices, a_indices]
R
array([ 36., 35., -25., 21., 18., -25., 28., 25., -25., 8., 2., -25., 12., 6., -25., 16., 10., -25., 0., -10., -25., 0., -10., -25., 0., -10., -25., 0., -10., -25., -25., -25., -25., -25., -25.])
# Number of feasible state-action pairs
L = len(R)
# (Degenerate) transition probability array
Q = np.zeros((L, n))
for i, (s, a) in enumerate(zip(s_indices, a_indices)):
if a == 0:
Q[i, getindex(min(S[s, 0]+1, maxage), S[s, 1], S)] = 1
elif a == 1:
Q[i, getindex(min(S[s, 0]+1, maxage), min(S[s, 1]+1, maxage-1), S)] = 1
else:
Q[i, getindex(1, 0, S)] = 1
# Create a DiscreteDP
ddp = DiscreteDP(R, Q, beta, s_indices, a_indices)
# Solve the dynamic optimization problem (by policy iteration)
res = ddp.solve()
# Number of iterations
res.num_iter
3
# Optimal policy
res.sigma
array([1, 0, 1, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2])
# Simulate the controlled Markov chain
initial_state = getindex(1, 0, S)
ts_length = 13
path = res.mc.simulate(ts_length=ts_length, init=initial_state)
# Plot sample paths of the age of asset (0th coordinate of S)
# and the number of services (1st coordinate of S)
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
captions = ['Age of Asset', 'Number of Services']
for i, caption in zip(range(2), captions):
ax[i].plot(S[path, i])
ax[i].set_xlabel('Year')
ax[i].set_ylabel(caption)
ax[i].set_title('Optimal State Path: ' + caption)
ax[0].set_yticks(np.linspace(1, 4, 4, endpoint=True))
ax[1].set_yticks(np.linspace(1, 2, 2, endpoint=True))
ax[1].set_ylim(0, 2.25)
plt.show()