#!/usr/bin/env python
# coding: utf-8

# # Direct Outcome Prediction Model
# Also known as standardization

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor

from causallib.datasets import load_smoking_weight
from causallib.estimation import Standardization, StratifiedStandardization
from causallib.evaluation import OutcomeEvaluator


# #### Data:
# The effect of quitting to smoke on weight loss.  
# Data example is taken from [Hernan and Robins Causal Inference Book](https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/)

# In[2]:


data = load_smoking_weight()
data.X.join(data.a).join(data.y).head()


# ## "Standard" Standardization
# A single model is trained with the treatment assignment as an additional feature.  
# During inference, the model assigns a treatment value for all samples,  
# thus predicting the potential outcome of all samples.

# In[3]:


std = Standardization(LinearRegression())
std.fit(data.X, data.a, data.y)


# ##### Outcome Prediction
# The model can be used to predict individual outcomes:  
# The potential outcome under each intervention

# In[4]:


ind_outcomes = std.estimate_individual_outcome(data.X, data.a)
ind_outcomes.head()


# The model can be used to predict population outcomes,  
# By aggregating the individual outcome prediction (e.g., mean or median).  
# Providing `agg_func` which is defaulted to `'mean'`

# In[5]:


median_pop_outcomes = std.estimate_population_outcome(data.X, data.a, agg_func="median")
median_pop_outcomes.rename("median", inplace=True)

mean_pop_outcomes = std.estimate_population_outcome(data.X, data.a, agg_func="mean")
mean_pop_outcomes.rename("mean", inplace=True)

pop_outcomes = mean_pop_outcomes.to_frame().join(median_pop_outcomes)
pop_outcomes


# ##### Effect Estimation
# Similarly, Effect estimation can be done on either individual or population level, depending on the outcomes provided.

# Population level effect using population outcomes:

# In[6]:


std.estimate_effect(mean_pop_outcomes[1], mean_pop_outcomes[0])


# Population level effect using individual outcome, but asking for aggregation (default behaviour):

# In[7]:


std.estimate_effect(ind_outcomes[1], ind_outcomes[0], agg="population")


# Individual level effect using inidiviual outcomes:  
# Since we're using a binary treatment with logistic regression on a standard model,  
# the difference is same for all individuals, and is equal to the coefficient of the treatment varaible

# In[8]:


print(std.learner.coef_[0])
std.estimate_effect(ind_outcomes[1], ind_outcomes[0], agg="individual").head()


# Multiple types of effect are also supported:

# In[9]:


std.estimate_effect(ind_outcomes[1], ind_outcomes[0], 
                    agg="individual", effect_types=["diff", "ratio"]).head()


# ### Treament one-hot encoded
# For multi-treatment cases, where treatments are coded as 0, 1, 2, ... but have no ordinal interpretation,  
# It is possible to make the model encode the treatment assignment vector as one hot matrix.

# In[10]:


std = Standardization(LinearRegression(), encode_treatment=True)
std.fit(data.X, data.a, data.y)
pop_outcomes = std.estimate_population_outcome(data.X, data.a, agg_func="mean")
std.estimate_effect(mean_pop_outcomes[1], mean_pop_outcomes[0])


# ## Stratified Standarziation
# While standardization can be viewed as a **"complete pooled"** estimator,   
# as it includes both treatment groups together,  
# Stratified Standardization can viewed as **"complete unpooled"** one,   
# as it completly stratifies the dataset by treatment values and learns a different model for each treatment group.

# In[11]:


std = StratifiedStandardization(LinearRegression())
std.fit(data.X, data.a, data.y)


# Checking the core `learner` we can see that it actually has two models, indexed by the treatment value:

# In[12]:


std.learner


# We can apply same analysis as above.

# In[13]:


pop_outcomes = std.estimate_population_outcome(data.X, data.a, agg_func="mean")
std.estimate_effect(mean_pop_outcomes[1], mean_pop_outcomes[0])


# We can see that internally, when asking for some potential outcome,  
# the model simply applies the model trained on the group of that treatment:

# In[14]:


potential_outcome = std.estimate_individual_outcome(data.X, data.a)[1]
direct_prediction =  std.learner[1].predict(data.X)
(potential_outcome == direct_prediction).all()


# #### Providing complex scheme of learners
# When supplying a single learner to the standardization above, 
# the model simply duplicates it for each treatment value.  
# However, it is possible to specify a different model for each treatment value explicitly.  
# For example, in cases where the treated are more complex than the untreated  
# (because, say, background of those choosed to be treated),   
# it is possible to specify them with a more expressive model:

# In[15]:


learner = {0: LinearRegression(),
           1: GradientBoostingRegressor()}
std = StratifiedStandardization(learner)
std.fit(data.X, data.a, data.y)
std.learner


# In[16]:


ind_outcomes = std.estimate_individual_outcome(data.X, data.a)
ind_outcomes.head()


# In[17]:


std.estimate_effect(ind_outcomes[1], ind_outcomes[0])


# ## Evaluation
# #### Simple evaluation

# In[18]:


plots = ["common_support", "continuous_accuracy"]
evaluator = OutcomeEvaluator(std)
evaluator._regression_metrics.pop("msle")  # We have negative values and this is log transforms
results = evaluator.evaluate_simple(data.X, data.a, data.y, plots=plots)


# Results show the results for each treatment group separetly and also combined:

# In[19]:


results.scores


# #### Thorough evaluation

# In[20]:


plots=["common_support", "continuous_accuracy", "residuals"]
evaluator = OutcomeEvaluator(Standardization(LinearRegression()))
results = evaluator.evaluate_cv(data.X, data.a, data.y,
                                plots=plots)


# In[21]:


results.scores


# In[22]:


results.models


# In[ ]: