#!/usr/bin/env python
# coding: utf-8

# # Assess predictions on multiclass wine data with a DNN model

# This notebook demonstrates the use of the `responsibleai` API to assess a DNN pytorch model trained on the multiclass wine dataset. It walks through the API calls necessary to create a widget with model analysis insights, then guides a visual analysis of the model.

# * [Launch Responsible AI Toolbox](#Launch-Responsible-AI-Toolbox)
#     * [Train a DNN Model](#Train-a-DNN-Model)
#     * [Create Model and Data Insights](#Create-Model-and-Data-Insights)
# * [Assess Your Model](#Assess-Your-Model)
#     * [Aggregate Analysis](#Aggregate-Analysis)
#     * [Individual Analysis](#Individual-Analysis)

# ## Launch Responsible AI Toolbox

# The following section examines the code necessary to create datasets and a model. It then generates insights using the `responsibleai` API that can be visually analyzed.

# ### Train a DNN Model
# *The following section can be skipped. It loads a dataset and trains a model for illustrative purposes.*

# In[ ]:


import sklearn
import zipfile

import torch
import torch.nn as nn
import torch.nn.functional as F
torch.manual_seed(0)

from sklearn.datasets import load_wine
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split

import pandas as pd


# #### Load the wine data

# In[ ]:


wine = load_wine()
X = wine['data']
y = wine['target']
classes = wine['target_names']
feature_names = wine['feature_names']


# In[ ]:


# Split data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)


# #### Define a simple pytorch classification model.

# In[ ]:


def pytorch_net(numCols, numClasses=3):
    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.norm = nn.LayerNorm(numCols)
            self.fc1 = nn.Linear(numCols, 100)
            self.fc2 = nn.Dropout(p=0.2)
            self.fc3 = nn.Linear(100, numClasses)
            self.output = nn.Softmax()

        def forward(self, X):
            X = self.norm(X)
            X = F.relu(self.fc1(X))
            X = self.fc2(X)
            X = self.fc3(X)
            return self.output(X)
    return Net()

torch_X = torch.Tensor(X_train).float()
torch_y = torch.Tensor(y_train).long()

# Create network structure
net = pytorch_net(X_train.shape[1])


# #### Train the pytorch DNN classifier on the training data.

# In[ ]:


# Train the model
epochs = 10000
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)

for epoch in range(epochs):
    optimizer.zero_grad()
    out = net(torch_X)
    loss = criterion(out, torch_y)
    loss.backward()
    optimizer.step()
    print('epoch: ', epoch, ' loss: ', loss.data.item())


# Wrap the model with scikit-learn style predict/predict_proba functions using the wrap_model function from https://github.com/microsoft/ml-wrappers to make it compatible with RAIInsights and the ResponsibleAIDashboard

# In[ ]:


from ml_wrappers import wrap_model, DatasetWrapper
model = wrap_model(net, DatasetWrapper(X_train), model_task='classification')


# ### Create Model and Data Insights

# In[ ]:


from raiwidgets import ResponsibleAIDashboard
from responsibleai import RAIInsights


# To use Responsible AI Toolbox, initialize a RAIInsights object upon which different components can be loaded.
# 
# RAIInsights accepts the model, the full dataset, the test dataset, the target feature string, the task type string, and a list of strings of categorical feature names as its arguments.

# In[ ]:


target_feature = 'wine'
X_train = pd.DataFrame(X_train, columns=feature_names)
X_test = pd.DataFrame(X_test, columns=feature_names)
X_train[target_feature] = y_train
X_test[target_feature] = y_test

rai_insights = RAIInsights(model, X_train, X_test, target_feature, 'classification')


# Add the components of the toolbox that are focused on model assessment.

# In[ ]:


# Interpretability
rai_insights.explainer.add()
# Error Analysis
rai_insights.error_analysis.add()


# Once all the desired components have been loaded, compute insights on the test set.

# In[ ]:


rai_insights.compute()


# Finally, visualize and explore the model insights. Use the resulting widget or follow the link to view this in a new tab.

# In[ ]:


ResponsibleAIDashboard(rai_insights)