#!/usr/bin/env python
# coding: utf-8

# # Bayesian Decision Science 
# 
# Adapted from Ravin Kumar's PyData Global 2020 talk on Bayesian Decision Science ([Youtube](https://www.youtube.com/watch?v=WT7o8QRIiQI))([Github](https://github.com/canyon289/PyDataGlobal_2020)), updated with `pymc` v4.0 and some notes on learning the new syntax.
# 
# His original talk was done with pymc3, and since then `pymc3` was rebranded as `pymc` and then they hit version 4.0 recently, which introduced some big changes.

# In[1]:


import matplotlib.pyplot as plt
from scipy import stats, optimize
import numpy as np
import pandas as pd
# np.set_printoptions(precision=2, floatmode="fixed")
import arviz as az

get_ipython().run_line_magic('config', "InlineBackend.figure_format='retina'")


# Note the new way of importing `pymc`:

# In[2]:


import pymc as pm

print(f"Running on PyMC v{pm.__version__}")


# ## Newsvendor problem with fixed demand
# 
# Next few cells follows Ravin's example of a newsvendor problem. First section he discusses a very simple example where demand is fixed, then you can simply write out the problem in code below:

# In[3]:


newspaper_cost = 5
customer_price = 7

anishas_inventory = 42
demand = 40

profit = demand*customer_price - newspaper_cost*anishas_inventory
 
f"Anisha makes ${profit} in profit"


# The next point is that you can use `numpy` and turn this into a function:

# In[4]:


def daily_profit(inventory, demand, newsvendor_cost=5, customer_price=7):
    """Calculates profit for a given day given inventory and demand"""
    return customer_price*np.min([inventory, demand]) - newsvendor_cost*inventory


# In[5]:


f"Decision 1: profit ${daily_profit(42, 40)}, Decision 2 profit ${daily_profit(38, 40)}"


# ### Optimization with `scipy.optimize` 
# 
# So he then demonstrates that you *could* just plot a whole bunch of values of inventory and graphically inspect where profit is maximized, but we can do this by turning it into an optimization problem using `scipy.optimize`.

# In[6]:


# Restating here so future slides won't have the print
def objective(inventory: int, demands: iter):
    """Takes an iterable of sales and returns the total profit"""
    # Make reward function negative to turn this into a minimization problem
    return -np.sum([daily_profit(inventory, d) for d in demands])


# In[7]:


# Let scipy find minimie objective function (negative profit) if demands is set to 40
opt_stoch = optimize.minimize_scalar(objective, bounds=(0, np.inf), args=([40, 40]))
opt_stoch


# Great, so we have an optimal objective value of -160 (i.e. \$160 in profit) and it found the optimal inventory is 40. Makes sense - if demand is exactly 40 then you optimize profit when you buy and sell exactly that number of newspapers.

# ## What if demand is uncertain
# 
# But real life is a little more complicated, so what if we simulate random demand?
# 
# Here, we simulate that demand is drawn from a normal distribution: $\mathcal{N}(40,20)$

# In[8]:


np.random.seed(seed=0)
random_seeds = [1,2,3,4,5]
demand = np.round(stats.norm(40, 20).rvs(15)) 
demand_seen, demand_unseen = demand[:5], demand[5:]


# In[9]:


f"The first five days demand we have seen {demand_seen}"


# In[10]:


f"The UNSEEN next ten days of demand {demand_unseen}"


# ## Instability in the sample mean and std deviation
# 
# He then goes on to use examples of taking the sample mean (the mean of the 5 days of demand that was seen) and the critical fractile equation to calculate the optimal inventory strategy.
# 
# I included his next point here, which is that 5 samples (i.e. seeing only 5 days of demand) can have wide differences in sample mean and std deviation. Very nice plot.

# In[11]:


simulations = 100
mean, std = [], []

for _ in range(simulations):
    true_mean,true_std = 40, 20
    values = stats.norm(40, 20).rvs(5)
    mean.append(values.mean())
    std.append(values.std())


# In[12]:


fig, ax = plt.subplots(2,1, figsize = (18, 8))

ax[0].plot(range(simulations), mean)
ax[0].set_xticks([])
ax[0].axhline(40, 0, 1, c="black", linestyle="--")
ax[0].set_ylabel("Estimate of Mean")

ax[1].plot(range(simulations), std)
ax[1].axhline(20, 0, 1, c="black", linestyle="--")
ax[1].set_xlabel("Calculation iteration") 
ax[1].set_ylabel("Estimate of Std")
 
ax[0].set_title("Instability in Sample Mean and Standard Deviation from just 5 samples");


# ## Enter Bayesian Modeling
# 
# Bayesian Modeling offers advantages because it handles this low sample size problem *natively*. He doesn't discuss how to choose informative priors and all that, but in this case he goes ahead and specifies the generative model below:
# 
# $$
# \begin{aligned}
# \sigma &\sim \text{HalfStudentT}(\sigma=10, \nu=20) & \text{Prior} \\
# \mu &\sim \mathcal{N}(\mu=\bar{x}, \sigma=20) & \text{Prior} \\
# \text{Demand} &\sim \text{TruncatedNormal}(\mu, \sigma) & \text{Likelihood}
# \end{aligned}
# $$

# In[13]:


demand_mean, demand_std  = demand_seen.mean(), demand_seen.std()


# ### Specify the model in `pymc`
# 
# Below I make some slight modifications to follow with the `pymc` docs. The workflow starts with specifying the generative model as such:

# In[14]:


newsvendor = pm.Model()

with newsvendor:
    
    # These are our priors that are set through "experience". Well get back to this
    sd = pm.HalfStudentT("standard_deviation_of_newspaper_demand", sigma=10, nu=20)
    mu = pm.Normal("mean_of_newspaper_demand", demand_seen.mean(), 20)
    demand = pm.TruncatedNormal("demand", mu=mu, sigma=sd, lower=0, observed = demand_seen)


# So first the model is instantiated and then everything follows in the `with` call. The priors and the likelihood are specified using a series of distributions. Each distribution is given a name and the last item `demand` gets an additional call `observed` where you can add in the observed data.
# 
# ### Sample to get posterior distributions
# 
# Next, we use `pm.sample`:

# In[15]:


with newsvendor:
    idata = pm.sample(tune=5000, draws=10000, chains=2)


# Here, idata is an `arviz.InferenceData` object, which is like a dataframe for storing the results of our sampling process. Here `pymc` automatically assigned the NUTS sampler for this problem, which is a nice 'batteries included' feature here. 
# 
# We can inspect this new object below:

# In[16]:


idata


# And we can use `arviz` to call some helpful functions on this object. 
# 
# Here we can plot the posterior distribution of our parameters:

# In[17]:


az.plot_trace(idata, combined=True, figsize=(12,8));


# And if we were doing more inference:
# 
# `az.summary`: Output High level summary statistics

# In[18]:


az.summary(idata, round_to=2)


# ### Sample posterior predictive
# 
# Next we can do a posterior predictive sample with `pm.sample_posterior_predictive()` and add the `extend_inferencedata` flag to add the posterior_predictive data as a new category with in our original `arviz.InferenceData` object.

# In[19]:


with newsvendor:
#     posterior_predictive = pm.sample_posterior_predictive(idata, progressbar=True)
    pm.sample_posterior_predictive(idata, extend_inferencedata=True)


# In[20]:


idata.posterior_predictive


# `az.plot_posterior`: Pretty-plot the posterior using the style of Kruschke

# In[21]:


inf_data = az.convert_to_dataset(idata)
az.plot_posterior(inf_data)


# Ravin's next point here is that we're using Bayesian Modeling to get all possible values of the demand we could see (given the 5 days of demand we've seen to date) and assign probabilities to each demand scenario.
# 
# He illustrates this below by showing a sample of 3 possible demand distributions:

# In[22]:


# Get random samples of possible distributions of demand and plot them. Used seed values 0, 10, 15

fig, ax = plt.subplots(figsize=(10, 6))

for seed in [0, 10, 23]:
    np.random.seed(seed)
    mean = np.random.choice(idata.posterior["mean_of_newspaper_demand"].values.flatten())
    std = np.random.choice(idata.posterior["standard_deviation_of_newspaper_demand"].values.flatten())

    x = np.linspace(20, 120, 100)
    y = stats.norm(mean, std).pdf(x)

    ax.plot(x,y)
    ax.set_yticks([]);

ax.set_title("Estimated DISTRIBUTIONS of Newspaper Demand \n estimates of parameters");
ax.set_xlabel("Possible demand values of newspapers");


# Now, we can plot the posterior distribution of possible daily demand:

# In[23]:


fig, ax = plt.subplots(figsize=(14, 6))
bayesian_demand_estimates = idata.posterior_predictive["demand"].values.flatten()

az.plot_dist(bayesian_demand_estimates, kind="hist", ax=ax)
ax.set_title("Posterior Predictive Distribution of possible Daily Demands")
ax.set_yticks([]);


# ## Optimize over all possible demand scenarios
# 
# Now we can optimize over ALL of the possible demand scenarios

# In[24]:


bayesian_demand_estimates = idata.posterior_predictive["demand"].values.flatten()
opt_stoch = optimize.minimize_scalar(objective, bounds=(0, np.inf), args=(bayesian_demand_estimates,))


# In[25]:


bayes_opt = opt_stoch.x
f"Optimal inventory from Bayesian demand estimation and optimizer is {np.round(bayes_opt)} newspapers"


# After calculating the optimal inventory strategy, we can then test this strategy on the 10 days of unseen demand and calculate the profit:

# In[26]:


objective_func_value = objective(opt_stoch.x, demand_unseen)
f"Testing inventory choice over unseen days yields {objective_func_value} units. Lower is better"


#