Adversarial debiasing [1] is an in-processing technique that learns a classifier to maximize prediction accuracy and simultaneously reduce an adversary's ability to determine the protected attribute from the predictions. This approach leads to a fair classifier as the predictions cannot carry any group discrimination information that the adversary can exploit. We will see how to use this algorithm for learning models with and without fairness constraints and apply them on the Adult dataset.
%matplotlib inline
# Load all necessary packages
import sys
sys.path.append("../")
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult, load_preproc_data_compas, load_preproc_data_german
from aif360.algorithms.inprocessing.adversarial_debiasing import AdversarialDebiasing
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.metrics import accuracy_score
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
/anaconda2/lib/python2.7/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`. from ._conv import register_converters as _register_converters
# Get the dataset and split into train and test
dataset_orig = load_preproc_data_adult()
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes,
dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)
(34189, 18)
(1.0, 0.0)
['sex', 'race']
([array([1.]), array([1.])], [array([0.]), array([0.])])
['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']
# Metric for the original dataset
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())
metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_test.mean_difference())
Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.192750 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.198626
min_max_scaler = MaxAbsScaler()
dataset_orig_train.features = min_max_scaler.fit_transform(dataset_orig_train.features)
dataset_orig_test.features = min_max_scaler.transform(dataset_orig_test.features)
metric_scaled_train = BinaryLabelDatasetMetric(dataset_orig_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
display(Markdown("#### Scaled dataset - Verify that the scaling does not affect the group label statistics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_train.mean_difference())
metric_scaled_test = BinaryLabelDatasetMetric(dataset_orig_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_scaled_test.mean_difference())
Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.192750 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.198626
# Load post-processing algorithm that equalizes the odds
# Learn parameters with debias set to False
sess = tf.Session()
plain_model = AdversarialDebiasing(privileged_groups = privileged_groups,
unprivileged_groups = unprivileged_groups,
scope_name='plain_classifier',
debias=False,
sess=sess)
plain_model.fit(dataset_orig_train)
epoch 0; iter: 0; batch classifier loss: 0.707587 epoch 0; iter: 200; batch classifier loss: 0.396519 epoch 1; iter: 0; batch classifier loss: 0.450665 epoch 1; iter: 200; batch classifier loss: 0.439213 epoch 2; iter: 0; batch classifier loss: 0.495045 epoch 2; iter: 200; batch classifier loss: 0.513713 epoch 3; iter: 0; batch classifier loss: 0.349774 epoch 3; iter: 200; batch classifier loss: 0.380733 epoch 4; iter: 0; batch classifier loss: 0.345100 epoch 4; iter: 200; batch classifier loss: 0.399097 epoch 5; iter: 0; batch classifier loss: 0.423275 epoch 5; iter: 200; batch classifier loss: 0.418846 epoch 6; iter: 0; batch classifier loss: 0.411661 epoch 6; iter: 200; batch classifier loss: 0.357504 epoch 7; iter: 0; batch classifier loss: 0.404039 epoch 7; iter: 200; batch classifier loss: 0.447010 epoch 8; iter: 0; batch classifier loss: 0.417079 epoch 8; iter: 200; batch classifier loss: 0.513713 epoch 9; iter: 0; batch classifier loss: 0.299503 epoch 9; iter: 200; batch classifier loss: 0.447425 epoch 10; iter: 0; batch classifier loss: 0.513632 epoch 10; iter: 200; batch classifier loss: 0.376522 epoch 11; iter: 0; batch classifier loss: 0.539716 epoch 11; iter: 200; batch classifier loss: 0.449963 epoch 12; iter: 0; batch classifier loss: 0.429474 epoch 12; iter: 200; batch classifier loss: 0.408618 epoch 13; iter: 0; batch classifier loss: 0.440729 epoch 13; iter: 200; batch classifier loss: 0.464946 epoch 14; iter: 0; batch classifier loss: 0.390707 epoch 14; iter: 200; batch classifier loss: 0.482682 epoch 15; iter: 0; batch classifier loss: 0.352653 epoch 15; iter: 200; batch classifier loss: 0.423660 epoch 16; iter: 0; batch classifier loss: 0.424234 epoch 16; iter: 200; batch classifier loss: 0.390729 epoch 17; iter: 0; batch classifier loss: 0.411589 epoch 17; iter: 200; batch classifier loss: 0.389220 epoch 18; iter: 0; batch classifier loss: 0.331668 epoch 18; iter: 200; batch classifier loss: 0.384711 epoch 19; iter: 0; batch classifier loss: 0.353290 epoch 19; iter: 200; batch classifier loss: 0.457664 epoch 20; iter: 0; batch classifier loss: 0.356439 epoch 20; iter: 200; batch classifier loss: 0.334217 epoch 21; iter: 0; batch classifier loss: 0.438827 epoch 21; iter: 200; batch classifier loss: 0.382024 epoch 22; iter: 0; batch classifier loss: 0.420756 epoch 22; iter: 200; batch classifier loss: 0.374907 epoch 23; iter: 0; batch classifier loss: 0.475280 epoch 23; iter: 200; batch classifier loss: 0.426664 epoch 24; iter: 0; batch classifier loss: 0.351704 epoch 24; iter: 200; batch classifier loss: 0.361529 epoch 25; iter: 0; batch classifier loss: 0.411303 epoch 25; iter: 200; batch classifier loss: 0.487325 epoch 26; iter: 0; batch classifier loss: 0.407306 epoch 26; iter: 200; batch classifier loss: 0.484252 epoch 27; iter: 0; batch classifier loss: 0.364663 epoch 27; iter: 200; batch classifier loss: 0.455063 epoch 28; iter: 0; batch classifier loss: 0.434696 epoch 28; iter: 200; batch classifier loss: 0.449683 epoch 29; iter: 0; batch classifier loss: 0.418321 epoch 29; iter: 200; batch classifier loss: 0.434468 epoch 30; iter: 0; batch classifier loss: 0.409858 epoch 30; iter: 200; batch classifier loss: 0.466626 epoch 31; iter: 0; batch classifier loss: 0.450511 epoch 31; iter: 200; batch classifier loss: 0.450152 epoch 32; iter: 0; batch classifier loss: 0.465642 epoch 32; iter: 200; batch classifier loss: 0.428328 epoch 33; iter: 0; batch classifier loss: 0.392987 epoch 33; iter: 200; batch classifier loss: 0.373837 epoch 34; iter: 0; batch classifier loss: 0.448555 epoch 34; iter: 200; batch classifier loss: 0.485128 epoch 35; iter: 0; batch classifier loss: 0.344462 epoch 35; iter: 200; batch classifier loss: 0.388613 epoch 36; iter: 0; batch classifier loss: 0.466822 epoch 36; iter: 200; batch classifier loss: 0.363230 epoch 37; iter: 0; batch classifier loss: 0.440089 epoch 37; iter: 200; batch classifier loss: 0.382196 epoch 38; iter: 0; batch classifier loss: 0.386720 epoch 38; iter: 200; batch classifier loss: 0.447435 epoch 39; iter: 0; batch classifier loss: 0.384074 epoch 39; iter: 200; batch classifier loss: 0.394575 epoch 40; iter: 0; batch classifier loss: 0.378215 epoch 40; iter: 200; batch classifier loss: 0.421163 epoch 41; iter: 0; batch classifier loss: 0.387049 epoch 41; iter: 200; batch classifier loss: 0.392461 epoch 42; iter: 0; batch classifier loss: 0.392354 epoch 42; iter: 200; batch classifier loss: 0.413999 epoch 43; iter: 0; batch classifier loss: 0.447966 epoch 43; iter: 200; batch classifier loss: 0.417566 epoch 44; iter: 0; batch classifier loss: 0.507449 epoch 44; iter: 200; batch classifier loss: 0.407887 epoch 45; iter: 0; batch classifier loss: 0.396286 epoch 45; iter: 200; batch classifier loss: 0.390399 epoch 46; iter: 0; batch classifier loss: 0.418439 epoch 46; iter: 200; batch classifier loss: 0.380013 epoch 47; iter: 0; batch classifier loss: 0.407893 epoch 47; iter: 200; batch classifier loss: 0.433631 epoch 48; iter: 0; batch classifier loss: 0.461974 epoch 48; iter: 200; batch classifier loss: 0.447301 epoch 49; iter: 0; batch classifier loss: 0.356089 epoch 49; iter: 200; batch classifier loss: 0.467275
<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x106cc2f10>
# Apply the plain model to test data
dataset_nodebiasing_train = plain_model.predict(dataset_orig_train)
dataset_nodebiasing_test = plain_model.predict(dataset_orig_test)
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
metric_dataset_nodebiasing_train = BinaryLabelDatasetMetric(dataset_nodebiasing_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())
metric_dataset_nodebiasing_test = BinaryLabelDatasetMetric(dataset_nodebiasing_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())
display(Markdown("#### Plain model - without debiasing - classification metrics"))
classified_metric_nodebiasing_test = ClassificationMetric(dataset_orig_test,
dataset_nodebiasing_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())
Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.217876 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.221187
Test set: Classification accuracy = 0.804955 Test set: Balanced classification accuracy = 0.666400 Test set: Disparate impact = 0.000000 Test set: Equal opportunity difference = -0.470687 Test set: Average odds difference = -0.291055 Test set: Theil_index = 0.175113
sess.close()
tf.reset_default_graph()
sess = tf.Session()
# Learn parameters with debias set to True
debiased_model = AdversarialDebiasing(privileged_groups = privileged_groups,
unprivileged_groups = unprivileged_groups,
scope_name='debiased_classifier',
debias=True,
sess=sess)
debiased_model.fit(dataset_orig_train)
epoch 0; iter: 0; batch classifier loss: 0.721611; batch adversarial loss: 0.630777 epoch 0; iter: 200; batch classifier loss: 0.442980; batch adversarial loss: 0.656542 epoch 1; iter: 0; batch classifier loss: 0.453149; batch adversarial loss: 0.657557 epoch 1; iter: 200; batch classifier loss: 0.496931; batch adversarial loss: 0.617686 epoch 2; iter: 0; batch classifier loss: 0.547117; batch adversarial loss: 0.653103 epoch 2; iter: 200; batch classifier loss: 0.331452; batch adversarial loss: 0.617297 epoch 3; iter: 0; batch classifier loss: 0.407935; batch adversarial loss: 0.627860 epoch 3; iter: 200; batch classifier loss: 0.413469; batch adversarial loss: 0.616086 epoch 4; iter: 0; batch classifier loss: 0.370982; batch adversarial loss: 0.604738 epoch 4; iter: 200; batch classifier loss: 0.469453; batch adversarial loss: 0.617892 epoch 5; iter: 0; batch classifier loss: 0.502638; batch adversarial loss: 0.595247 epoch 5; iter: 200; batch classifier loss: 0.379807; batch adversarial loss: 0.635309 epoch 6; iter: 0; batch classifier loss: 0.484228; batch adversarial loss: 0.591971 epoch 6; iter: 200; batch classifier loss: 0.421526; batch adversarial loss: 0.612220 epoch 7; iter: 0; batch classifier loss: 0.392731; batch adversarial loss: 0.636230 epoch 7; iter: 200; batch classifier loss: 0.391191; batch adversarial loss: 0.614543 epoch 8; iter: 0; batch classifier loss: 0.481106; batch adversarial loss: 0.665277 epoch 8; iter: 200; batch classifier loss: 0.465566; batch adversarial loss: 0.638007 epoch 9; iter: 0; batch classifier loss: 0.418696; batch adversarial loss: 0.575338 epoch 9; iter: 200; batch classifier loss: 0.450467; batch adversarial loss: 0.611760 epoch 10; iter: 0; batch classifier loss: 0.347429; batch adversarial loss: 0.652913 epoch 10; iter: 200; batch classifier loss: 0.398043; batch adversarial loss: 0.601349 epoch 11; iter: 0; batch classifier loss: 0.478137; batch adversarial loss: 0.630736 epoch 11; iter: 200; batch classifier loss: 0.479216; batch adversarial loss: 0.552848 epoch 12; iter: 0; batch classifier loss: 0.473339; batch adversarial loss: 0.597487 epoch 12; iter: 200; batch classifier loss: 0.378719; batch adversarial loss: 0.630144 epoch 13; iter: 0; batch classifier loss: 0.461751; batch adversarial loss: 0.583154 epoch 13; iter: 200; batch classifier loss: 0.427811; batch adversarial loss: 0.594790 epoch 14; iter: 0; batch classifier loss: 0.520254; batch adversarial loss: 0.586920 epoch 14; iter: 200; batch classifier loss: 0.375389; batch adversarial loss: 0.622141 epoch 15; iter: 0; batch classifier loss: 0.358494; batch adversarial loss: 0.610482 epoch 15; iter: 200; batch classifier loss: 0.377246; batch adversarial loss: 0.600464 epoch 16; iter: 0; batch classifier loss: 0.330568; batch adversarial loss: 0.631124 epoch 16; iter: 200; batch classifier loss: 0.493238; batch adversarial loss: 0.602217 epoch 17; iter: 0; batch classifier loss: 0.430809; batch adversarial loss: 0.622507 epoch 17; iter: 200; batch classifier loss: 0.420727; batch adversarial loss: 0.631383 epoch 18; iter: 0; batch classifier loss: 0.463418; batch adversarial loss: 0.613122 epoch 18; iter: 200; batch classifier loss: 0.407586; batch adversarial loss: 0.583201 epoch 19; iter: 0; batch classifier loss: 0.438854; batch adversarial loss: 0.588028 epoch 19; iter: 200; batch classifier loss: 0.468554; batch adversarial loss: 0.586143 epoch 20; iter: 0; batch classifier loss: 0.491485; batch adversarial loss: 0.627042 epoch 20; iter: 200; batch classifier loss: 0.434700; batch adversarial loss: 0.629269 epoch 21; iter: 0; batch classifier loss: 0.445875; batch adversarial loss: 0.589738 epoch 21; iter: 200; batch classifier loss: 0.435593; batch adversarial loss: 0.629081 epoch 22; iter: 0; batch classifier loss: 0.364423; batch adversarial loss: 0.610640 epoch 22; iter: 200; batch classifier loss: 0.389425; batch adversarial loss: 0.605668 epoch 23; iter: 0; batch classifier loss: 0.562680; batch adversarial loss: 0.634945 epoch 23; iter: 200; batch classifier loss: 0.473808; batch adversarial loss: 0.566636 epoch 24; iter: 0; batch classifier loss: 0.424366; batch adversarial loss: 0.585584 epoch 24; iter: 200; batch classifier loss: 0.359588; batch adversarial loss: 0.609465 epoch 25; iter: 0; batch classifier loss: 0.519477; batch adversarial loss: 0.564588 epoch 25; iter: 200; batch classifier loss: 0.449761; batch adversarial loss: 0.571238 epoch 26; iter: 0; batch classifier loss: 0.447675; batch adversarial loss: 0.591839 epoch 26; iter: 200; batch classifier loss: 0.369251; batch adversarial loss: 0.580864 epoch 27; iter: 0; batch classifier loss: 0.384472; batch adversarial loss: 0.661156 epoch 27; iter: 200; batch classifier loss: 0.393334; batch adversarial loss: 0.638825 epoch 28; iter: 0; batch classifier loss: 0.451982; batch adversarial loss: 0.552013 epoch 28; iter: 200; batch classifier loss: 0.399544; batch adversarial loss: 0.612651 epoch 29; iter: 0; batch classifier loss: 0.390971; batch adversarial loss: 0.580380 epoch 29; iter: 200; batch classifier loss: 0.401580; batch adversarial loss: 0.582367 epoch 30; iter: 0; batch classifier loss: 0.297665; batch adversarial loss: 0.547717 epoch 30; iter: 200; batch classifier loss: 0.470934; batch adversarial loss: 0.625385 epoch 31; iter: 0; batch classifier loss: 0.418402; batch adversarial loss: 0.622812 epoch 31; iter: 200; batch classifier loss: 0.385281; batch adversarial loss: 0.603873 epoch 32; iter: 0; batch classifier loss: 0.418848; batch adversarial loss: 0.573049 epoch 32; iter: 200; batch classifier loss: 0.443066; batch adversarial loss: 0.621068 epoch 33; iter: 0; batch classifier loss: 0.461614; batch adversarial loss: 0.606992 epoch 33; iter: 200; batch classifier loss: 0.451093; batch adversarial loss: 0.621659 epoch 34; iter: 0; batch classifier loss: 0.407544; batch adversarial loss: 0.646782 epoch 34; iter: 200; batch classifier loss: 0.441481; batch adversarial loss: 0.645866 epoch 35; iter: 0; batch classifier loss: 0.344949; batch adversarial loss: 0.589151 epoch 35; iter: 200; batch classifier loss: 0.387160; batch adversarial loss: 0.549727 epoch 36; iter: 0; batch classifier loss: 0.432171; batch adversarial loss: 0.675994 epoch 36; iter: 200; batch classifier loss: 0.388955; batch adversarial loss: 0.621595 epoch 37; iter: 0; batch classifier loss: 0.443978; batch adversarial loss: 0.658480 epoch 37; iter: 200; batch classifier loss: 0.422210; batch adversarial loss: 0.617039 epoch 38; iter: 0; batch classifier loss: 0.381281; batch adversarial loss: 0.588504 epoch 38; iter: 200; batch classifier loss: 0.323892; batch adversarial loss: 0.596638 epoch 39; iter: 0; batch classifier loss: 0.396359; batch adversarial loss: 0.614882 epoch 39; iter: 200; batch classifier loss: 0.473418; batch adversarial loss: 0.562516 epoch 40; iter: 0; batch classifier loss: 0.415690; batch adversarial loss: 0.617672 epoch 40; iter: 200; batch classifier loss: 0.472975; batch adversarial loss: 0.537192 epoch 41; iter: 0; batch classifier loss: 0.473487; batch adversarial loss: 0.591801 epoch 41; iter: 200; batch classifier loss: 0.379132; batch adversarial loss: 0.602665 epoch 42; iter: 0; batch classifier loss: 0.418546; batch adversarial loss: 0.568511 epoch 42; iter: 200; batch classifier loss: 0.366345; batch adversarial loss: 0.603213 epoch 43; iter: 0; batch classifier loss: 0.364993; batch adversarial loss: 0.596730 epoch 43; iter: 200; batch classifier loss: 0.436417; batch adversarial loss: 0.611999 epoch 44; iter: 0; batch classifier loss: 0.419406; batch adversarial loss: 0.602352 epoch 44; iter: 200; batch classifier loss: 0.472369; batch adversarial loss: 0.592246 epoch 45; iter: 0; batch classifier loss: 0.479547; batch adversarial loss: 0.564802 epoch 45; iter: 200; batch classifier loss: 0.476123; batch adversarial loss: 0.603599 epoch 46; iter: 0; batch classifier loss: 0.546357; batch adversarial loss: 0.631894 epoch 46; iter: 200; batch classifier loss: 0.389170; batch adversarial loss: 0.576345 epoch 47; iter: 0; batch classifier loss: 0.480703; batch adversarial loss: 0.603182 epoch 47; iter: 200; batch classifier loss: 0.586694; batch adversarial loss: 0.635715 epoch 48; iter: 0; batch classifier loss: 0.394101; batch adversarial loss: 0.558852 epoch 48; iter: 200; batch classifier loss: 0.453874; batch adversarial loss: 0.602889 epoch 49; iter: 0; batch classifier loss: 0.506737; batch adversarial loss: 0.624289 epoch 49; iter: 200; batch classifier loss: 0.359482; batch adversarial loss: 0.618086
<aif360.algorithms.inprocessing.adversarial_debiasing.AdversarialDebiasing at 0x1c32efcf10>
# Apply the plain model to test data
dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(dataset_orig_test)
# Metrics for the dataset from plain model (without debiasing)
display(Markdown("#### Plain model - without debiasing - dataset metrics"))
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_train.mean_difference())
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_nodebiasing_test.mean_difference())
# Metrics for the dataset from model with debiasing
display(Markdown("#### Model - with debiasing - dataset metrics"))
metric_dataset_debiasing_train = BinaryLabelDatasetMetric(dataset_debiasing_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Train set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_train.mean_difference())
metric_dataset_debiasing_test = BinaryLabelDatasetMetric(dataset_debiasing_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_dataset_debiasing_test.mean_difference())
display(Markdown("#### Plain model - without debiasing - classification metrics"))
print("Test set: Classification accuracy = %f" % classified_metric_nodebiasing_test.accuracy())
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_nodebiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_nodebiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_nodebiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_nodebiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_nodebiasing_test.theil_index())
display(Markdown("#### Model - with debiasing - classification metrics"))
classified_metric_debiasing_test = ClassificationMetric(dataset_orig_test,
dataset_debiasing_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
print("Test set: Classification accuracy = %f" % classified_metric_debiasing_test.accuracy())
TPR = classified_metric_debiasing_test.true_positive_rate()
TNR = classified_metric_debiasing_test.true_negative_rate()
bal_acc_debiasing_test = 0.5*(TPR+TNR)
print("Test set: Balanced classification accuracy = %f" % bal_acc_debiasing_test)
print("Test set: Disparate impact = %f" % classified_metric_debiasing_test.disparate_impact())
print("Test set: Equal opportunity difference = %f" % classified_metric_debiasing_test.equal_opportunity_difference())
print("Test set: Average odds difference = %f" % classified_metric_debiasing_test.average_odds_difference())
print("Test set: Theil_index = %f" % classified_metric_debiasing_test.theil_index())
Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.217876 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.221187
Train set: Difference in mean outcomes between unprivileged and privileged groups = -0.090157 Test set: Difference in mean outcomes between unprivileged and privileged groups = -0.094732
Test set: Classification accuracy = 0.804955 Test set: Balanced classification accuracy = 0.666400 Test set: Disparate impact = 0.000000 Test set: Equal opportunity difference = -0.470687 Test set: Average odds difference = -0.291055 Test set: Theil_index = 0.175113
Test set: Classification accuracy = 0.792056 Test set: Balanced classification accuracy = 0.672481 Test set: Disparate impact = 0.553746 Test set: Equal opportunity difference = -0.090716 Test set: Average odds difference = -0.053841 Test set: Theil_index = 0.170358
References:
[1] B. H. Zhang, B. Lemoine, and M. Mitchell, "Mitigating UnwantedBiases with Adversarial Learning,"
AAAI/ACM Conference on Artificial Intelligence, Ethics, and Society, 2018.