using QuantEcon
using Plots
pyplot()

emax = 8             # Energy carrying capacity
n = emax + 1         # Number of states, 0, ..., emax
m = 3                # Number of areas (actions), 0, ..., m-1
e = [2, 4, 5]        # Energy offerings
p = [1.0, 0.7, 0.8]  # Survival probabilities
q = [0.5, 0.8, 0.7]  # Success probabilities

T = 10               # Time horizon

# Terminal values
v_term = ones(n)
v_term[1]= 0;

L = n * m  # Number of feasible state-action pairs

s_indices = Vector{Int}(L)
for i in 1:n
    s_indices[(i-1) * m + 1 : i * m] = i
end

a_indices = Vector{Int}(L)
for i in 1:n
    a_indices[(i-1) * m + 1 : i * m] = 1:m
end

# Reward vector
R = zeros(L);

# Transition probability array
Q = zeros(L, n)
for (i, s) in enumerate(s_indices)
    k = a_indices[i]
    if s == 1
        Q[i, 1] = 1
    elseif s == 2
        Q[i, minimum([emax+1, s-1+e[k]])] = p[k] * q[k]
        Q[i, 1] = 1 - p[k] * q[k]
    else
        Q[i, minimum([emax+1, s-1+e[k]])] = p[k] * q[k]
        Q[i, s-1] = p[k] * (1 - q[k])
        Q[i, 1] = 1 - p[k]
    end
end

# Discount factor
beta = 0.99999999;

# Create a DiscreteDP
ddp = DiscreteDP(R, Q, beta, s_indices, a_indices);

vs = Matrix(T+1, n)    
vs[T+1, :] = v_term
for t in T+1:-1:2
    vs[t-1, :] = bellman_operator(ddp, vs[t, :])
end

p1 = bar(0:8, vs[1, :], xlabel="Stock of Energy", xticks=0:1:8,
         ylabel="Probability", yticks=0:0.2:1, ylims=(0,1), label="", 
         title="Survivial Probability, Period 0", )
p2 = bar(0:8, vs[6, :], xlabel="Stock of Energy", xticks=0:1:8,
         ylabel="Probability", yticks=0:0.2:1, ylims=(0,1), label="",
         title="Survivial Probability, Period 5", )
plot(p1, p2, layout=(1,2), size=(700,230))