''' define options to run experiments on '''
options = {
'target': ["AKI"],
'cohort': ['SINAI'],
'bootstrap': ['yes'],
'n_iterations': [100], # number of iterations necessary
'n_ratio': [0.5], #how much of the data should we use for bootstrapping
}
''' retrieve previous experiments as the case may be '''
derivation_experiments = unpickle(derivation_path) or {}
validation_experiments = unpickle(validation_path) or {}
n_validation_experiments = 0
#filter validation experiments if needed
with Timer() as t:
''' iterate over different options '''
for combination in product(*options.values()):
''' for every experiment saved on the pipeline '''
for derivation_experiment in derivation_experiments.values():
''' initialize parameters '''
exp_id = str(uuid.uuid1())
params = dict(zip(options.keys(), combination))
''' load the data '''
test = Load().execute(filename=filenames[params['cohort']])
params['model_features'] = derivation_experiment['parameters']['model_features']
params['algorithm'] = derivation_experiment['parameters']['algorithm']
print(f"Running experiment with following parameters: {params}")
''' evaluate on complete data using complete cohort and existing pipeline '''
validation_experiment = defaultdict(lambda: {})
validation_experiment = run_validation_experiment(params, test, **derivation_experiment['pipeline'])
bs_performance = defaultdict(lambda:{})
bs_experiments = []
if params.get('bootstrap') == 'yes':
metrics = ['precision', 'recall', 'f1-score', 'auc', 'dor']
n_iterations = params['n_iterations']
n_size = int(len(test) * params['n_ratio'])
bs_exps = []
for i in range(n_iterations):
print(f"Validating bootstrapped sample #{i+1}")
bs_test = resample(test, n_samples=n_size)
bs_experiments.append(run_validation_experiment(params, bs_test, **derivation_experiment['pipeline']))
for metric in metrics:
measurements = [exp['performance']['discrimination'][metric] for exp in bs_experiments]
bs_performance[metric]['mean'] = np.mean(measurements)
bs_performance[metric]['std'] = np.std(measurements)
bs_performance[metric]['ci'] = np.std(measurements) * 2 #95% CI
''' save everything '''
#validation_experiment['derivation_exp_id'] = derivation_experiment['exp_id']
validation_experiment['parameters'] = params
validation_experiment['performance']['bootstrap'] = bs_performance
validation_experiment['exp_id'] = exp_id
validation_experiments[exp_id] = validation_experiment
n_validation_experiments += 1
print(f'Running {n_validation_experiments} validation experiments took {t.interval:.03f} sec.')
print(validation_experiments)
''' store everything '''
if pickle(validation_experiments, validation_path): print('Successfully saved.')