#!/usr/bin/env python # coding: utf-8 # # Plan house improvements using causal analysis # This notebook demonstrates the use of the Responsible AI Toolbox to make renovation decisions from historic apartments pricing data. It walks through the API calls necessary to create a widget with causal inferencing insights, then guides a visual analysis of the data. # ## Launch Responsible AI Toolbox # The following section examines the code necessary to create the dataset. It then generates insights using the `responsibleai` API that can be visually analyzed. # In[ ]: import pandas as pd from sklearn.model_selection import train_test_split import zipfile # First, load the apartment dataset and specify the different types of features. Then, clean it and put it into a DataFrame with named columns. After loading and cleaning the data, split the datapoints into training and test sets. Assemble separate datasets for the full sample and the test data. # In[ ]: from raiutils.dataset import fetch_dataset from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer def split_label(dataset, target_feature): X = dataset.drop([target_feature], axis=1) y = dataset[[target_feature]] return X, y def clean_data(X, y, target_feature): features = X.columns.values.tolist() classes = y[target_feature].unique().tolist() pipe_cfg = { 'num_cols': X.dtypes[X.dtypes == 'int64'].index.values.tolist(), 'cat_cols': X.dtypes[X.dtypes == 'object'].index.values.tolist(), } num_pipe = Pipeline([ ('num_imputer', SimpleImputer(strategy='median')), ('num_scaler', StandardScaler()) ]) cat_pipe = Pipeline([ ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')), ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False)) ]) feat_pipe = ColumnTransformer([ ('num_pipe', num_pipe, pipe_cfg['num_cols']), ('cat_pipe', cat_pipe, pipe_cfg['cat_cols']) ]) X = feat_pipe.fit_transform(X) print(pipe_cfg['cat_cols']) return X, feat_pipe, features, classes target_feature = 'SalePriceK' categorical_features = [] outdirname = 'responsibleai.12.28.21' zipfilename = outdirname + '.zip' fetch_dataset('https://publictestdatasets.blob.core.windows.net/data/' + zipfilename, zipfilename) with zipfile.ZipFile(zipfilename, 'r') as unzip: unzip.extractall('.') all_data = pd.read_csv('apartments-train.csv') all_data = all_data.drop(['Sold_HigherThan_Median','SalePrice'], axis=1) X, y = split_label(all_data, target_feature) X_train_original, X_test_original, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=7) X_train, feat_pipe, features, classes = clean_data(X_train_original, y_train, target_feature) y_train = y_train[target_feature].to_numpy() X_test = feat_pipe.transform(X_test_original) y_test = y_test[target_feature].to_numpy() train_data = X_train_original.copy() train_data[target_feature] = y_train test_data = X_test_original.copy() test_data[target_feature] = y_test # ### Creat Data Insights # In[ ]: from raiwidgets import ResponsibleAIDashboard from responsibleai import RAIInsights # To use Responsible AI Dashboard, initialize a RAIInsights object upon which different components can be loaded. # # RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments. # You may also create the `FeatureMetadata` container, identify any feature of your choice as the `identity_feature`, specify a list of strings of categorical feature names via the `categorical_features` parameter, and specify dropped features via the `dropped_features` parameter. The `FeatureMetadata` may also be passed into the `RAIInsights`. # In[ ]: from responsibleai.feature_metadata import FeatureMetadata feature_metadata = FeatureMetadata(categorical_features=categorical_features, dropped_features=[]) # In[ ]: rai_insights = RAIInsights(None, train_data, test_data, target_feature, 'regression', feature_metadata=feature_metadata, classes=['Less than median', 'More than median']) # Add the components of the toolbox that are focused on decision-making. # In[ ]: # Queue Responsible AI insights with causal insights rai_insights.causal.add(treatment_features=['OverallCond', 'OverallQual', 'Fireplaces', 'GarageCars', 'ScreenPorch']) # Once all the desired components have been loaded, compute insights on the test set. # In[ ]: # Compute insights rai_insights.compute() # Finally, visualize and explore the model insights. Use the resulting widget or follow the link to view this in a new tab. # In[ ]: ResponsibleAIDashboard(rai_insights) # See this [developer blog](https://techcommunity.microsoft.com/t5/ai-machine-learning-blog/responsible-ai-dashboard-a-one-stop-shop-for-operationalizing/ba-p/3030944) (Decision Making Flow section) to learn more about this use case and how to use the dashboard to debug your housing price prediction model.