%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append("../")
from aif360.algorithms.inprocessing import GerryFairClassifier
from aif360.algorithms.inprocessing.gerryfair.clean import array_to_tuple
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult
from sklearn import svm
from sklearn import tree
from sklearn.kernel_ridge import KernelRidge
from sklearn import linear_model
from aif360.metrics import BinaryLabelDatasetMetric
from IPython.display import Image
import pickle
import matplotlib.pyplot as plt
# load data set
data_set = load_preproc_data_adult(sub_samp=1000, balance=True)
max_iterations = 10
C = 100
print_flag = True
gamma = .005
fair_model = GerryFairClassifier(C=C, printflag=print_flag, gamma=gamma, fairness_def='FP',
max_iters=max_iterations, heatmapflag=False)
# fit method
fair_model.fit(data_set, early_termination=True)
# predict method. If threshold in (0, 1) produces binary predictions
dataset_yhat = fair_model.predict(data_set, threshold=False)
iteration: 1, error: 0.229, fairness violation: 0.05428400000000001, violated group size: 0.249 iteration: 2, error: 0.3645, fairness violation: 0.027142000000000006, violated group size: 0.249 iteration: 3, error: 0.4096666666666666, fairness violation: 0.01809466666666667, violated group size: 0.251 iteration: 4, error: 0.43225, fairness violation: 0.013571000000000003, violated group size: 0.249 iteration: 5, error: 0.44580000000000014, fairness violation: 0.0108568, violated group size: 0.251 iteration: 6, error: 0.4548333333333334, fairness violation: 0.009047333333333338, violated group size: 0.251 iteration: 7, error: 0.46128571428571435, fairness violation: 0.007754857142857144, violated group size: 0.251 iteration: 8, error: 0.466125, fairness violation: 0.006785500000000003, violated group size: 0.251 iteration: 9, error: 0.469888888888889, fairness violation: 0.006031555555555558, violated group size: 0.249
# auditing
gerry_metric = BinaryLabelDatasetMetric(data_set)
gamma_disparity = gerry_metric.rich_subgroup(array_to_tuple(dataset_yhat.labels), 'FP')
print(gamma_disparity)
0.0060315555555555565
# set to 10 iterations for fast running of notebook - set >= 1000 when running real experiments
# tests learning with different hypothesis classes
pareto_iters = 10
def multiple_classifiers_pareto(dataset, gamma_list=[0.002, 0.005, 0.01], save_results=False, iters=pareto_iters):
ln_predictor = linear_model.LinearRegression()
svm_predictor = svm.LinearSVR()
tree_predictor = tree.DecisionTreeRegressor(max_depth=3)
kernel_predictor = KernelRidge(alpha=1.0, gamma=1.0, kernel='rbf')
predictor_dict = {'Linear': {'predictor': ln_predictor, 'iters': iters},
'SVR': {'predictor': svm_predictor, 'iters': iters},
'Tree': {'predictor': tree_predictor, 'iters': iters},
'Kernel': {'predictor': kernel_predictor, 'iters': iters}}
results_dict = {}
for pred in predictor_dict:
print('Curr Predictor: {}'.format(pred))
predictor = predictor_dict[pred]['predictor']
max_iters = predictor_dict[pred]['iters']
fair_clf = GerryFairClassifier(C=100, printflag=True, gamma=1, predictor=predictor, max_iters=max_iters)
fair_clf.printflag = False
fair_clf.max_iters=max_iters
errors, fp_violations, fn_violations = fair_clf.pareto(dataset, gamma_list)
results_dict[pred] = {'errors': errors, 'fp_violations': fp_violations, 'fn_violations': fn_violations}
if save_results:
pickle.dump(results_dict, open('results_dict_' + str(gamma_list) + '_gammas' + str(gamma_list) + '.pkl', 'wb'))
multiple_classifiers_pareto(data_set)
Curr Predictor: Linear Curr Predictor: SVR Curr Predictor: Tree Curr Predictor: Kernel