Learning fair representations [1] is a pre-processing technique that finds a latent representation which encodes the data well but obfuscates information about protected attributes. We will see how to use this algorithm for learning representations that encourage individual fairness and apply them on the Adult dataset.
References:
[1] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, "Learning Fair Representations." International Conference on Machine Learning, 2013.
%matplotlib inline
# Load all necessary packages
import sys
sys.path.append("../")
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult
from aif360.algorithms.preprocessing.lfr import LFR
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import numpy as np
from common_utils import compute_metrics
# Get the dataset and split into train and test
dataset_orig = load_preproc_data_adult()
dataset_orig_train, dataset_orig_test = dataset_orig.split([0.7], shuffle=True)
# print out some labels, names, etc.
display(Markdown("#### Training Dataset shape"))
print(dataset_orig_train.features.shape)
display(Markdown("#### Favorable and unfavorable labels"))
print(dataset_orig_train.favorable_label, dataset_orig_train.unfavorable_label)
display(Markdown("#### Protected attribute names"))
print(dataset_orig_train.protected_attribute_names)
display(Markdown("#### Privileged and unprivileged protected attribute values"))
print(dataset_orig_train.privileged_protected_attributes,
dataset_orig_train.unprivileged_protected_attributes)
display(Markdown("#### Dataset feature names"))
print(dataset_orig_train.feature_names)
(34189, 18)
1.0 0.0
['sex', 'race']
[array([1.]), array([1.])] [array([0.]), array([0.])]
['race', 'sex', 'Age (decade)=10', 'Age (decade)=20', 'Age (decade)=30', 'Age (decade)=40', 'Age (decade)=50', 'Age (decade)=60', 'Age (decade)=>=70', 'Education Years=6', 'Education Years=7', 'Education Years=8', 'Education Years=9', 'Education Years=10', 'Education Years=11', 'Education Years=12', 'Education Years=<6', 'Education Years=>12']
# Metric for the original dataset
privileged_groups = [{'sex': 1.0}]
unprivileged_groups = [{'sex': 0.0}]
metric_orig_train = BinaryLabelDatasetMetric(dataset_orig_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
display(Markdown("#### Original training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_train.mean_difference())
metric_orig_test = BinaryLabelDatasetMetric(dataset_orig_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
display(Markdown("#### Original test dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_orig_test.mean_difference())
scale_orig = StandardScaler()
dataset_orig_train.features = scale_orig.fit_transform(dataset_orig_train.features)
dataset_orig_test.features = scale_orig.transform(dataset_orig_test.features)
# Input recontruction quality - Ax
# Fairness constraint - Az
# Output prediction error - Ay
privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]
TR = LFR(unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups,
k=10, Ax=0.1, Ay=1.0, Az=2.0,
verbose=1
)
TR = TR.fit(dataset_orig_train, maxiter=5000, maxfun=5000)
step: 0, loss: 1.0939550595829053, L_x: 2.531834521858599, L_y: 0.8200826015334493, L_z: 0.010344502931797964 step: 250, loss: 0.9162820270109503, L_x: 2.529109218043187, L_y: 0.6432961063010657, L_z: 0.010037499452782905 step: 500, loss: 0.8207071510514392, L_x: 2.5204911168067197, L_y: 0.5500397646035967, L_z: 0.00930913738358528 step: 750, loss: 0.8102771268166408, L_x: 2.511873834704061, L_y: 0.5427956868742799, L_z: 0.008147028235977415 step: 1000, loss: 0.7996570283329768, L_x: 2.480828451323288, L_y: 0.5399446552800813, L_z: 0.00581476396028337 step: 1250, loss: 0.7844631169970814, L_x: 2.4242508289183613, L_y: 0.5304307199052671, L_z: 0.005803657099989009 step: 1500, loss: 0.7653305722023572, L_x: 2.3297047767431986, L_y: 0.5176248867874912, L_z: 0.007367603870273078 step: 1750, loss: 0.7154304631442515, L_x: 2.085955877234543, L_y: 0.48081670080967953, L_z: 0.013009087305558827 step: 2000, loss: 0.6906420918886886, L_x: 1.896344106091722, L_y: 0.4646651544564373, L_z: 0.018171263411539594 step: 2250, loss: 0.6783680937630076, L_x: 1.7895665853948028, L_y: 0.4587714378849466, L_z: 0.020319998669290275 step: 2500, loss: 0.6725576747654705, L_x: 1.742061633693402, L_y: 0.4577729094336143, L_z: 0.020289300981257967 step: 2750, loss: 0.6694103860159343, L_x: 1.7548885984309939, L_y: 0.4545867175857845, L_z: 0.019667404293525217 step: 3000, loss: 0.6658207636894926, L_x: 1.7515234617350093, L_y: 0.4539151313299769, L_z: 0.018376643093007367 step: 3250, loss: 0.6481415219979564, L_x: 1.7252276686316934, L_y: 0.4491717858033674, L_z: 0.013223484665709846 step: 3500, loss: 0.645366243737316, L_x: 1.7196207136719521, L_y: 0.4482843307446003, L_z: 0.012559920812760247 step: 3750, loss: 0.6425278186287126, L_x: 1.7117758355776211, L_y: 0.4473063883366716, L_z: 0.012021923367139413 step: 4000, loss: 0.6419409673076768, L_x: 1.7092609385556714, L_y: 0.44744616781598634, L_z: 0.011784352818061686 step: 4250, loss: 0.6377801462539607, L_x: 1.6917081956472533, L_y: 0.4496335370425122, L_z: 0.009487894823361622
# Transform training data and align features
dataset_transf_train = TR.transform(dataset_orig_train)
dataset_transf_test = TR.transform(dataset_orig_test)
print(classification_report(dataset_orig_test.labels, dataset_transf_test.labels))
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
display(Markdown("#### Transformed training dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_train.mean_difference())
metric_transf_test = BinaryLabelDatasetMetric(dataset_transf_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
display(Markdown("#### Transformed test dataset"))
print("Difference in mean outcomes between unprivileged and privileged groups = %f" % metric_transf_test.mean_difference())
from common_utils import compute_metrics
display(Markdown("#### Predictions from transformed testing data"))
bal_acc_arr_transf = []
disp_imp_arr_transf = []
class_thresh_arr = np.linspace(0.01, 0.99, 100)
dataset_transf_test_new = dataset_orig_test.copy(deepcopy=True)
dataset_transf_test_new.scores = dataset_transf_test.scores
for thresh in class_thresh_arr:
fav_inds = dataset_transf_test_new.scores > thresh
dataset_transf_test_new.labels[fav_inds] = 1.0
dataset_transf_test_new.labels[~fav_inds] = 0.0
metric_test_aft = compute_metrics(dataset_orig_test, dataset_transf_test_new,
unprivileged_groups, privileged_groups,
disp = False)
bal_acc_arr_transf.append(metric_test_aft["Balanced accuracy"])
disp_imp_arr_transf.append(metric_test_aft["Disparate impact"])
fig, ax1 = plt.subplots(figsize=(10,7))
ax1.plot(class_thresh_arr, bal_acc_arr_transf)
ax1.set_xlabel('Classification Thresholds', fontsize=16, fontweight='bold')
ax1.set_ylabel('Balanced Accuracy', color='b', fontsize=16, fontweight='bold')
ax1.xaxis.set_tick_params(labelsize=14)
ax1.yaxis.set_tick_params(labelsize=14)
ax2 = ax1.twinx()
ax2.plot(class_thresh_arr, np.abs(1.0-np.array(disp_imp_arr_transf)), color='r')
ax2.set_ylabel('abs(1-disparate impact)', color='r', fontsize=16, fontweight='bold')
ax2.yaxis.set_tick_params(labelsize=14)
ax2.grid(True)
abs(1-disparate impact) must be small (close to 0) for classifier predictions to be fair.
display(Markdown("#### Individual fairness metrics"))
print("Consistency of labels in transformed training dataset= %f" %metric_transf_train.consistency())
print("Consistency of labels in original training dataset= %f" %metric_orig_train.consistency())
print("Consistency of labels in transformed test dataset= %f" %metric_transf_test.consistency())
print("Consistency of labels in original test dataset= %f" %metric_orig_test.consistency())
def check_algorithm_success():
"""Transformed dataset consistency should be greater than original dataset."""
assert metric_transf_test.consistency() > metric_orig_test.consistency(), "Transformed dataset consistency should be greater than original dataset."
check_algorithm_success()