#!/usr/bin/env python
# coding: utf-8

# # Plan house improvements using causal analysis

# This notebook demonstrates the use of the Responsible AI Toolbox to make renovation decisions from historic apartments pricing data. It walks through the API calls necessary to create a widget with causal inferencing insights, then guides a visual analysis of the data.

# ## Launch Responsible AI Toolbox

# The following section examines the code necessary to create the dataset. It then generates insights using the `responsibleai` API that can be visually analyzed.

# In[ ]:


import pandas as pd
from sklearn.model_selection import train_test_split
import zipfile


# First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data.

# In[ ]:


from raiutils.dataset import fetch_dataset
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

def split_label(dataset, target_feature):
    X = dataset.drop([target_feature], axis=1)
    y = dataset[[target_feature]]
    return X, y

def clean_data(X, y, target_feature):
    features = X.columns.values.tolist()
    classes = y[target_feature].unique().tolist()
    pipe_cfg = {
        'num_cols': X.dtypes[X.dtypes == 'int64'].index.values.tolist(),
        'cat_cols': X.dtypes[X.dtypes == 'object'].index.values.tolist(),
    }
    num_pipe = Pipeline([
        ('num_imputer', SimpleImputer(strategy='median')),
        ('num_scaler', StandardScaler())
    ])
    cat_pipe = Pipeline([
        ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),
        ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))
    ])
    feat_pipe = ColumnTransformer([
        ('num_pipe', num_pipe, pipe_cfg['num_cols']),
        ('cat_pipe', cat_pipe, pipe_cfg['cat_cols'])
    ])
    X = feat_pipe.fit_transform(X)
    print(pipe_cfg['cat_cols'])
    return X, feat_pipe, features, classes

target_feature = 'SalePriceK'
categorical_features = []

outdirname = 'responsibleai.12.28.21'
zipfilename = outdirname + '.zip'

fetch_dataset('https://publictestdatasets.blob.core.windows.net/data/' + zipfilename, zipfilename)

with zipfile.ZipFile(zipfilename, 'r') as unzip:
    unzip.extractall('.')

all_data = pd.read_csv('apartments-train.csv')
all_data = all_data.drop(['Sold_HigherThan_Median','SalePrice'], axis=1)
X, y = split_label(all_data, target_feature)
X_train_original, X_test_original, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=7)

X_train, feat_pipe, features, classes = clean_data(X_train_original, y_train, target_feature)
y_train = y_train[target_feature].to_numpy()

X_test = feat_pipe.transform(X_test_original)
y_test = y_test[target_feature].to_numpy()

train_data = X_train_original.copy()
train_data[target_feature] = y_train

test_data = X_test_original.copy()
test_data[target_feature] = y_test


# ### Creat Data Insights

# In[ ]:


from raiwidgets import ResponsibleAIDashboard
from responsibleai import RAIInsights


# To use Responsible AI Dashboard, initialize a RAIInsights object upon which different components can be loaded.
# 
# RAIInsights accepts the model, the full dataset, the test dataset, the target feature string, the task type string, and a list of strings of categorical feature names as its arguments.
# 
# Here you can pass None as the model object as we only require historic data to make causal decisions.

# In[ ]:


rai_insights = RAIInsights(None, train_data, test_data, target_feature, 'regression',
                             categorical_features=categorical_features, 
                             classes=['Less than median', 'More than median'])


# Add the components of the toolbox that are focused on decision-making.

# In[ ]:


# Queue Responsible AI insights with causal insights
rai_insights.causal.add(treatment_features=['OverallCond', 'OverallQual', 'Fireplaces', 'GarageCars', 'ScreenPorch'])


# Once all the desired components have been loaded, compute insights on the test set.

# In[ ]:


# Compute insights
rai_insights.compute()


# Finally, visualize and explore the model insights. Use the resulting widget or follow the link to view this in a new tab.

# In[ ]:


ResponsibleAIDashboard(rai_insights)


# See this [developer blog](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/responsible-ai-dashboard-a-one-stop-shop-for-operationalizing/ba-p/3030944) (Decision Making Flow section) to learn more about this use case and how to use the dashboard to debug your housing price prediction model.