import pandas as pd
import scipy.io
from os import listdir
from sklearn.utils import shuffle
from sklearn import preprocessing
#from nilearn import plotting
#from proteus.io import util
from proteus.visu import sbp_visu
import glob,os
#import nibabel as nib
import pickle
from proteus.predic import high_confidence_at
import numpy as np
import pandas as pd
from proteus.predic import prediction
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn import metrics
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import statsmodels.formula.api as smf
import statsmodels.api as sm
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from copy import deepcopy
from itertools import cycle
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
/home/angela/anaconda3/envs/vcog_paper_py35/lib/python3.5/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release. from numpy.core.umath_tests import inner1d /home/angela/anaconda3/envs/vcog_paper_py35/lib/python3.5/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead. from pandas.core import datetools
def stats_mask(y_true, y_pred, mask_selected=None):
if mask_selected is None:
mask_selected = np.ones(y_pred.shape).astype(bool)
print('------------------------')
print('Ratio:', y_true[mask_selected].sum()/y_true.sum())
print('# : ', y_true[mask_selected].sum())
print('# true values: ',mask_selected.sum())
print('ACC : ', np.mean((y_true == y_pred)[mask_selected]))
def predic_stats(y_, y_pred, lr_decision):
# number of AD subjects
n_ad = sum(y_)
print('Total number of TARGET subjects: ', n_ad)
# number of CN subjects
n_cn = len(y_) - sum(y_)
print('Total number of NON-TARGET subjects: ', n_cn)
# number of subjects predicted as AD at stage 1
n_pos = sum(y_pred)
print('Stage 1 number of hits (true and false positives): ', n_pos)
# true positives at stage 1
n_pos_ad = sum(y_pred[y_.astype(bool)])
print('Stage 1 TRUE positives: ', n_pos_ad)
# false positives at stage 1
n_pos_cn = n_pos - n_pos_ad
print('Stage 1 FALSE positives: ', n_pos_cn)
# number of CN subjects not identified as positive (true negatives)
n_neg1_cn = n_cn - n_pos_cn
print('Stage 1 TRUE negatives: ', n_neg1_cn)
# number of all flagged HPC-AD subjects
n_flag = sum(y_pred[lr_decision>0])
print('Total number of flagged HPC-AD subjects: ', n_flag)
# number of flagged HPC-AD subjects who are actually AD (true positives)
y_pred_true = y_ + y_pred
y_pred_true = y_pred_true==2
n_flag_ad = sum(y_pred_true[lr_decision>0])
print('Number of flagged HPC-AD subjects that are TRUE positives: ', n_flag_ad)
# number of flagged HPC-AD subjects that are actually CN (false positives)
n_flag_cn = n_flag - n_flag_ad
print('Number of flagged HPC-AD subjects that are FALSE positives: ', n_flag_cn)
# number of CN subjects that were not flagged (true negatives)
n_neg_cn = n_cn - n_flag_cn
print('Number of true negatives: ', n_neg_cn)
print('#############################')
print('Stage 1 stats for TARGET vs NON-TARGET')
print('Precision for AD: ', n_pos_ad/(n_pos_ad + n_pos_cn))
prec = n_pos_ad/(n_pos_ad + n_pos_cn)
print('Recall (or sensitivity) for AD: ', n_pos_ad/n_ad)
sens = n_pos_ad/n_ad
print('Specificity: ', n_neg1_cn/n_cn)
spec = n_neg1_cn/n_cn
fp = (1-spec)*664
tp = sens*336
adj_prec = tp/(tp+fp)
print('Adjusted precision for 33.6% baseline rate: ', adj_prec)
print('Accuracy: ', (n_pos_ad + n_neg1_cn)/(n_ad + n_cn))
acc = (n_pos_ad + n_neg1_cn)/(n_ad + n_cn)
print('#############################')
print('Stage 2 stats for TARGET vs NON-TARGET')
print('Precision for HPC-AD: ', n_flag_ad/n_flag)
prec_2 = n_flag_ad/n_flag
print('Recall (or sensitivity) for HPC-AD: ', n_flag_ad/n_ad)
sens_2 = n_flag_ad/n_ad
print('Specificity: ', n_neg_cn/n_cn)
spec_2 = n_neg_cn/n_cn
fp_2 = (1-spec_2)*664
tp_2 = sens_2*336
adj_prec_2 = tp_2/(tp_2 + fp_2)
print('Adjusted precision for 33.6% baseline rate: ', adj_prec_2)
print('Accuracy: ', (n_flag_ad + n_neg_cn)/(n_ad + n_cn))
acc_2 = (n_flag_ad + n_neg_cn)/(n_ad + n_cn)
return sens, spec, prec, acc, sens_2, spec_2, prec_2, acc_2
sns.set(font_scale=2)
sns.set_style("white")
#sns.set_context("paper")
#sns.set_palette("colorblind")
#sns.set_palette("GnBu_d")
#sns.set_palette(sns.cubehelix_palette(n_colors=8))
#sns.set_palette(sns.color_palette("BrBG", 6))
cpal = ["#F0DFB2", "#CFA255", "#995D12", "#B3E2DB", "#58B0A6", "#0D7068"]
sns.set_palette(cpal)
path_results = '/home/angela/Desktop/vcog_paper/gigascience/third_submission/roc/cog/'
np.random.seed(1)
#np.random.RandomState(1)
long_data = pd.read_csv('/home/angela/Documents/adni_csv/adnimerge_upenn_unw_av45_neurobat.csv')
/home/angela/anaconda3/envs/vcog_paper_py35/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (101) have mixed types. Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
data = pd.read_csv('/home/angela/Desktop/vcog_paper/adni1_vbm_adcn_subtypes_20171209/7clus/adni1_model_weights.csv')
data.dropna(subset=['sub1','age_scan','gender','mean_gm','tiv', 'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR'],
inplace=True)
data = data[['RID','age_scan','gender','mean_gm','tiv',
'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR',
'sub1','sub2','sub3','sub4','sub5','sub6','sub7',
'ABETA','TAU','conv_2_ad','AD','MCI','CN','APOE4_bin','DX']]
# Mask of the CN and AD subjects only
mask_cnad = data.loc[:,['CN','AD']].values.sum(1).astype(bool)
#reload(high_confidence)
scalerX = preprocessing.StandardScaler()
scaler = preprocessing.StandardScaler()
x_ = data.iloc[mask_cnad,data.columns.get_loc("ADAS13"):data.columns.get_loc("CLOCKSCOR")+1].values
#x_ = scalerX.fit_transform(x_)
y_ = data[['AD']].values.ravel()[mask_cnad]
confounds = data[['gender','age_scan','mean_gm','tiv']].values[mask_cnad,:]
#confounds = data[['sex','age_r']].values[mask_cnad,:]
#confounds[:, 1:] = scaler.fit_transform(confounds[:, 1:])
#confounds[:, 0] = preprocessing.binarize(confounds[:, 0].reshape(-1, 1), threshold=1)[:, 0]
#crm = prediction.ConfoundsRm(confounds, x_)
#x_ = crm.transform(confounds, x_)
x_ = scaler.fit_transform(np.hstack((x_,confounds)))
#x_ = np.hstack((x_,confounds))
x_.shape, y_.shape, confounds.shape
((370, 9), (370,), (370, 4))
scores_ad_cn=[]
scores_s2 = []
ad_precision = []
cn_precision = []
ad_recall = []
cn_recall = []
ad_f1_score = []
cn_f1_score = []
s1_spec = []
s1_sens = []
s1_prec = []
s1_acc = []
s2_spec = []
s2_sens = []
s2_prec = []
s2_acc = []
skf = StratifiedKFold(n_splits=3)
for train_index, val_index in skf.split(x_,y_):
X_training, X_val = x_[train_index], x_[val_index]
y_training, y_val = y_[train_index], y_[val_index]
hpc = high_confidence_at.TwoStagesPrediction(
n_iter=500,
shuffle_test_split=0.5,
min_gamma=.99,
thresh_ratio=0.1)
hpc.fit(X_training, X_training, y_training)
_, dic_results = hpc.predict(X_val, X_val)
# test in validation sample
acc = metrics.accuracy_score(y_val, (dic_results['s1df'][:,0]>0).astype(float))
tmp_mask = (dic_results['s2df'][:,1]>0)
acc_s2 = metrics.accuracy_score(y_val[tmp_mask], (dic_results['s1df'][:,0]>0).astype(float)[tmp_mask])
scores_ad_cn.append(acc)
scores_s2.append(acc_s2)
print('Classifying AD vs CN...')
print((dic_results['s1df'][:,0]>0).astype(float))
y_pred = (dic_results['s1df'][:,0]>0).astype(float)
lr_decision = dic_results['s2df'][:,1]
# BASE SVM PERFORMANCE
ad_p = metrics.precision_score(y_val, y_pred)
ad_precision.append(ad_p)
cn_p = metrics.precision_score(y_val, y_pred, pos_label=0)
cn_precision.append(cn_p)
ad_r = metrics.recall_score(y_val, y_pred)
ad_recall.append(ad_r)
cn_r = metrics.recall_score(y_val, y_pred, pos_label=0)
cn_recall.append(cn_r)
ad_f1 = metrics.f1_score(y_val, y_pred)
ad_f1_score.append(ad_f1)
cn_f1 = metrics.f1_score(y_val, y_pred, pos_label=0)
cn_f1_score.append(cn_f1)
sens, spec, prec, acc, sens_2, spec_2, prec_2, acc_2 = predic_stats(y_val, y_pred, lr_decision)
s1_spec.append(spec)
s1_sens.append(sens)
s1_prec.append(prec)
s1_acc.append(acc)
s2_spec.append(spec_2)
s2_sens.append(sens_2)
s2_prec.append(prec_2)
s2_acc.append(acc_2)
Stage 1 Proba: [1. 1. 1. 1. 1. 0.98755187 1. 1. 1. 1. 0.87649402 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99595142 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.9958159 1. 0.98387097 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.84063745 1. 1. 0.06995885 1. 1. 1. 1. 1. 0.99242424 0.26848249 1. 1. 1. 0.93562232 0.93951613 1. 0.06934307 1. 0.61666667 0.30379747 1. 1. 1. 1. 1. 1. 0.8458498 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.91304348 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.95801527 1. 1. 0.50420168 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.98418972 1. 1. 1. 0.67916667 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.98275862 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99615385 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ] Average hm score 0.9065040650406504 Stage 2 Adjusted gamma: 1.0 Adjusted gamma: 1.0 Classifying AD vs CN... [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1.] Total number of TARGET subjects: 55.0 Total number of NON-TARGET subjects: 69.0 Stage 1 number of hits (true and false positives): 53.0 Stage 1 TRUE positives: 52.0 Stage 1 FALSE positives: 1.0 Stage 1 TRUE negatives: 68.0 Total number of flagged HPC-AD subjects: 48.0 Number of flagged HPC-AD subjects that are TRUE positives: 48 Number of flagged HPC-AD subjects that are FALSE positives: 0.0 Number of true negatives: 69.0 ############################# Stage 1 stats for TARGET vs NON-TARGET Precision for AD: 0.9811320754716981 Recall (or sensitivity) for AD: 0.9454545454545454 Specificity: 0.9855072463768116 Adjusted precision for 33.6% baseline rate: 0.9705978964453406 Accuracy: 0.967741935483871 ############################# Stage 2 stats for TARGET vs NON-TARGET Precision for HPC-AD: 1.0 Recall (or sensitivity) for HPC-AD: 0.8727272727272727 Specificity: 1.0 Adjusted precision for 33.6% baseline rate: 1.0 Accuracy: 0.9435483870967742 Stage 1 Proba: [0.47808765 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99173554 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99595142 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.07630522 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.9922179 1. 1. 1. 1. 0.66666667 1. 1. 1. 0.98015873 1. 1. 1. 1. 1. 1. 0.27667984 1. 1. 0.76226415 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.8 1. 0.02521008 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0.34008097 1. 1. 0.22510823 1. 1. 0.1902834 0.83921569 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.70881226 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99588477 1. 1. 1. 0.66945607 1. 1. 1. 1. 1. 0.9916318 1. 0.99570815 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.90041494 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.9561753 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.80237154 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ] Average hm score 0.8987854251012146 Stage 2 Adjusted gamma: 1.0 Adjusted gamma: 1.0 Classifying AD vs CN... [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1.] Total number of TARGET subjects: 55.0 Total number of NON-TARGET subjects: 68.0 Stage 1 number of hits (true and false positives): 52.0 Stage 1 TRUE positives: 52.0 Stage 1 FALSE positives: 0.0 Stage 1 TRUE negatives: 68.0 Total number of flagged HPC-AD subjects: 45.0 Number of flagged HPC-AD subjects that are TRUE positives: 45 Number of flagged HPC-AD subjects that are FALSE positives: 0.0 Number of true negatives: 68.0 ############################# Stage 1 stats for TARGET vs NON-TARGET Precision for AD: 1.0 Recall (or sensitivity) for AD: 0.9454545454545454 Specificity: 1.0 Adjusted precision for 33.6% baseline rate: 1.0 Accuracy: 0.975609756097561 ############################# Stage 2 stats for TARGET vs NON-TARGET Precision for HPC-AD: 1.0 Recall (or sensitivity) for HPC-AD: 0.8181818181818182 Specificity: 1.0 Adjusted precision for 33.6% baseline rate: 1.0 Accuracy: 0.9186991869918699 Stage 1 Proba: [0.54581673 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.91696751 0.97107438 0.99578059 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99190283 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.76893939 1. 0.65863454 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99610895 1. 1. 1. 1. 0.61382114 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.24505929 1. 1. 0.23018868 0.044 0.97142857 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.9245283 1. 0.81512605 1. 1. 1. 1. 1. 1. 0.99264706 1. 1. 1. 1. 1. 1. 0.10196078 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99579832 1. 1. 1. 1. 0.99095023 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99137931 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99601594 1. 1. 1. 0.99166667 1. 0.98367347 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.48790323 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.675 1. 1. 1. 1. 1. 0.98076923 1. 1. 0. 1. 1. 1. 1. 0.925 1. 1. 1. 1. ] Average hm score 0.8825910931174089 Stage 2 Adjusted gamma: 1.0 Adjusted gamma: 1.0 Classifying AD vs CN... [0. 0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] Total number of TARGET subjects: 55.0 Total number of NON-TARGET subjects: 68.0 Stage 1 number of hits (true and false positives): 58.0 Stage 1 TRUE positives: 55.0 Stage 1 FALSE positives: 3.0 Stage 1 TRUE negatives: 65.0 Total number of flagged HPC-AD subjects: 51.0 Number of flagged HPC-AD subjects that are TRUE positives: 51 Number of flagged HPC-AD subjects that are FALSE positives: 0.0 Number of true negatives: 68.0 ############################# Stage 1 stats for TARGET vs NON-TARGET Precision for AD: 0.9482758620689655 Recall (or sensitivity) for AD: 1.0 Specificity: 0.9558823529411765 Adjusted precision for 33.6% baseline rate: 0.9198067632850243 Accuracy: 0.975609756097561 ############################# Stage 2 stats for TARGET vs NON-TARGET Precision for HPC-AD: 1.0 Recall (or sensitivity) for HPC-AD: 0.9272727272727272 Specificity: 1.0 Adjusted precision for 33.6% baseline rate: 1.0 Accuracy: 0.967479674796748
print('Stage 1')
print('Mean sensitivity: ', np.mean(s1_sens))
print('Mean specificity: ', np.mean(s1_spec))
print('Mean precision: ', np.mean(s1_prec))
print('Mean accuracy: ', np.mean(s1_acc))
print('#'*10)
print('Stage 2')
print('Mean sensitivity: ', np.mean(s2_sens))
print('Mean specificity: ', np.mean(s2_spec))
print('Mean precision: ', np.mean(s2_prec))
print('Mean accuracy: ', np.mean(s2_acc))
Stage 1 Mean sensitivity: 0.9636363636363635 Mean specificity: 0.9804631997726627 Mean precision: 0.9764693125135545 Mean accuracy: 0.972987149226331 ########## Stage 2 Mean sensitivity: 0.8727272727272727 Mean specificity: 1.0 Mean precision: 1.0 Mean accuracy: 0.9432424162951308
#reload(high_confidence)
hpc = high_confidence_at.TwoStagesPrediction(
n_iter=500,
shuffle_test_split=0.5,
min_gamma=.99,
thresh_ratio=0.1)
hpc.fit(x_, x_, y_)
Stage 1 Proba: [0.70416667 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.9766537 0.95454545 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.97165992 1. 0.36 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.47983871 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.63052209 1. 1. 0.12840467 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.97233202 1. 0.68379447 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.58364312 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99578059 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.97647059 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.49367089 1. 1. 0.29365079 1. 1. 1. 1. 1. 1. 0.1042471 1. 1. 1. 1. 1. 1. 0.01626016 1. 0.82170543 0.61316872 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.95256917 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.96124031 1. 1. 0.92765957 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.94488189 1. 1. 1. 1. 1. 0.99111111 1. 0.99601594 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99607843 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.99606299 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ] Average hm score 0.918918918918919 Stage 2 Adjusted gamma: 1.0 Adjusted gamma: 1.0
array_results, dic_results = hpc.predict(x_, x_)
# Level 1
print('Level 1')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float))
print('Level 2')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float), dic_results['s2df'][:,1]>0)
#stats_mask(dic_results['s2df'][:,2]>0)
#stats_mask(dic_results['s2df'][:,3]>0)
Level 1 ------------------------ Ratio: 1.0 # : 165.0 # true values: 370 ACC : 0.9756756756756757 Level 2 ------------------------ Ratio: 0.9090909090909091 # : 150.0 # true values: 150 ACC : 1.0
y_pred = (dic_results['s1df'][:,0]>0).astype(float)
lr_decision = dic_results['s2df'][:,1]
print('Stage 1 stats for AD vs CN')
print(metrics.classification_report(y_, y_pred))
Stage 1 stats for AD vs CN precision recall f1-score support 0.0 0.98 0.98 0.98 205 1.0 0.98 0.97 0.97 165 avg / total 0.98 0.98 0.98 370
predic_stats(y_, y_pred, lr_decision)
Total number of TARGET subjects: 165.0 Total number of NON-TARGET subjects: 205.0 Stage 1 number of hits (true and false positives): 164.0 Stage 1 TRUE positives: 160.0 Stage 1 FALSE positives: 4.0 Stage 1 TRUE negatives: 201.0 Total number of flagged HPC-AD subjects: 150.0 Number of flagged HPC-AD subjects that are TRUE positives: 150 Number of flagged HPC-AD subjects that are FALSE positives: 0.0 Number of true negatives: 205.0 ############################# Stage 1 stats for TARGET vs NON-TARGET Precision for AD: 0.975609756097561 Recall (or sensitivity) for AD: 0.9696969696969697 Specificity: 0.9804878048780488 Adjusted precision for 33.6% baseline rate: 0.9617559586143343 Accuracy: 0.9756756756756757 ############################# Stage 2 stats for TARGET vs NON-TARGET Precision for HPC-AD: 1.0 Recall (or sensitivity) for HPC-AD: 0.9090909090909091 Specificity: 1.0 Adjusted precision for 33.6% baseline rate: 1.0 Accuracy: 0.9594594594594594
(0.9696969696969697, 0.9804878048780488, 0.975609756097561, 0.9756756756756757, 0.9090909090909091, 1.0, 1.0, 0.9594594594594594)
base = high_confidence_at.BaseSvc()
base.fit(x_, y_)
y_predicted = base.predict(x_)
y_score = base.decision_function(x_)
y_score.shape
(370,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_b = dict()
tpr_b = dict()
roc_auc_b = dict()
for i in range(n_classes):
fpr_b[i], tpr_b[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_b[i] = auc(fpr_b[i], tpr_b[i])
# Compute micro-average ROC curve and ROC area
fpr_b["micro"], tpr_b["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_b["micro"] = auc(fpr_b["micro"], tpr_b["micro"])
average_precision_b = average_precision_score(y_true, y_score)
print('Average precision-recall score: {0:0.2f}'.format(
average_precision_b))
precision_b, recall_b, _ = precision_recall_curve(y_true, y_score)
Average precision-recall score: 1.00
y_true = y_.astype(int)
y_true = label_binarize(y_, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(lr_decision, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_h = dict()
tpr_h = dict()
roc_auc_h = dict()
for i in range(n_classes):
fpr_h[i], tpr_h[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_h[i] = auc(fpr_h[i], tpr_h[i])
# Compute micro-average ROC curve and ROC area
fpr_h["micro"], tpr_h["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_h["micro"] = auc(fpr_h["micro"], tpr_h["micro"])
average_precision_h = average_precision_score(y_true, y_score)
print('Average precision-recall score: {0:0.2f}'.format(
average_precision_h))
precision_h, recall_h, _ = precision_recall_curve(y_true, y_score)
Average precision-recall score: 1.00
svm_param_grid=dict(C=(np.logspace(-2, 1, 15)))
clf_svm = SVC(kernel='rbf', class_weight='balanced', decision_function_shape='ovr', random_state=1)
grclf_svm = GridSearchCV(clf_svm, param_grid=svm_param_grid,
cv=StratifiedShuffleSplit(n_splits=50, test_size=.2, random_state=1))
grclf_svm.fit(x_, y_)
y_predicted = grclf_svm.predict(x_)
y_score = grclf_svm.decision_function(x_)
y_score.shape
(370,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_svcrbf = dict()
tpr_svcrbf = dict()
roc_auc_svcrbf = dict()
for i in range(n_classes):
fpr_svcrbf[i], tpr_svcrbf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_svcrbf[i] = auc(fpr_svcrbf[i], tpr_svcrbf[i])
# Compute micro-average ROC curve and ROC area
fpr_svcrbf["micro"], tpr_svcrbf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_svcrbf["micro"] = auc(fpr_svcrbf["micro"], tpr_svcrbf["micro"])
k_range = list(range(3,7))
weight_opt = ["uniform", "distance"]
knn_param_grid = dict(n_neighbors = k_range, weights = weight_opt)
clf_knn = KNeighborsClassifier(algorithm='auto')
grclf_knn = GridSearchCV(clf_knn, param_grid=knn_param_grid,
cv=StratifiedShuffleSplit(n_splits=50, test_size=.2, random_state=1))
grclf_knn.fit(x_, y_)
y_predicted = grclf_knn.predict(x_)
#y_score = clf.decision_function(x_)
y_score = grclf_knn.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(370,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_knn = dict()
tpr_knn = dict()
roc_auc_knn = dict()
for i in range(n_classes):
fpr_knn[i], tpr_knn[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_knn[i] = auc(fpr_knn[i], tpr_knn[i])
# Compute micro-average ROC curve and ROC area
fpr_knn["micro"], tpr_knn["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_knn["micro"] = auc(fpr_knn["micro"], tpr_knn["micro"])
n_features = x_.shape[1]
rf_param_grid = [
{'n_estimators': [10, 25], 'max_features': [5, n_features],
'max_depth': [10, 50, None], 'bootstrap': [True, False]}
]
clf_rf = RandomForestClassifier(random_state=1)
grclf_rf = GridSearchCV(clf_rf, param_grid=rf_param_grid,
cv=StratifiedShuffleSplit(n_splits=50, test_size=.2, random_state=1))
grclf_rf.fit(x_, y_)
y_predicted = grclf_rf.predict(x_)
#y_score = clf.decision_function(x_)
y_score = grclf_rf.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(370,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_rf = dict()
tpr_rf = dict()
roc_auc_rf = dict()
for i in range(n_classes):
fpr_rf[i], tpr_rf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_rf[i] = auc(fpr_rf[i], tpr_rf[i])
# Compute micro-average ROC curve and ROC area
fpr_rf["micro"], tpr_rf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_rf["micro"] = auc(fpr_rf["micro"], tpr_rf["micro"])
clf_gnb = GaussianNB()
clf_gnb.fit(x_, y_)
y_predicted = clf_gnb.predict(x_)
#y_score = clf.decision_function(x_)
y_score = clf_gnb.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(370,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_nb = dict()
tpr_nb = dict()
roc_auc_nb = dict()
for i in range(n_classes):
fpr_nb[i], tpr_nb[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_nb[i] = auc(fpr_nb[i], tpr_nb[i])
# Compute micro-average ROC curve and ROC area
fpr_nb["micro"], tpr_nb["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_nb["micro"] = auc(fpr_nb["micro"], tpr_nb["micro"])
#plt.figure()
fig, ax = plt.subplots()
fig.set_size_inches(10,8)
lw = 4
plt.rc('xtick', labelsize=40)
plt.rc('ytick', labelsize=40)
plt.plot(fpr_svcrbf[0], tpr_svcrbf[0], color='green',
lw=lw, label='RBF SVM (AUC=%0.3f)' % roc_auc_svcrbf[0])
plt.plot(fpr_knn[0], tpr_knn[0], color='pink',
lw=lw, label='KNN (AUC=%0.3f)' % roc_auc_knn[0])
plt.plot(fpr_rf[0], tpr_rf[0], color='brown',
lw=lw, label='RF (AUC=%0.3f)' % roc_auc_rf[0])
plt.plot(fpr_nb[0], tpr_nb[0], color='orange',
lw=lw, label='GNB (AUC=%0.3f)' % roc_auc_nb[0])
plt.plot(fpr_b[0], tpr_b[0], color='blue',
lw=lw, label='Base (AUC=%0.3f)' % roc_auc_b[0])
plt.plot(fpr_h[0], tpr_h[0], color='red',
lw=lw, label='HPS (AUC=%0.3f)' % roc_auc_h[0])
plt.plot([0, 1], [0, 1], color='grey', lw=lw, linestyle='--')
plt.xlim([-0.05, 1.00])
plt.ylim([0.0, 1.05])
plt.xlabel('FPR', fontdict={'size': 40})
plt.ylabel('TPR', fontdict={'size': 40})
plt.title('ADNI1 AD vs CN', fontdict={'size': 40})
plt.legend(loc="lower right", prop={'size': 25})
plt.show()
fig.savefig(path_results + 'adni1_ad_roc_multi.pdf', bbox_inches='tight')
# load the data
adni1_mci = pd.read_csv('/home/angela/Desktop/vcog_paper/adni1_vbm_adcn_subtypes_20171209/7clus/adni1_mci_bl_demog_weights.csv')
mask_mci = adni1_mci.loc[:,'MCI'].values.astype(bool)
adni1_mci = adni1_mci.iloc[mask_mci]
adni1_mci.dropna(subset=['sub1','age_scan','gender','mean_gm','tiv',
'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR'],inplace=True)
adni1_mci = adni1_mci[['RID','age_scan','gender','mean_gm','tiv',
'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR',
'sub1','sub2','sub3','sub4','sub5','sub6','sub7',
'ABETA','TAU','conv_2_ad','AD','MCI','CN','APOE4_bin','DX','Month_conv']]
len(adni1_mci)
235
x_ = adni1_mci.iloc[:,adni1_mci.columns.get_loc("ADAS13"):adni1_mci.columns.get_loc("CLOCKSCOR")+1].values
#x_ = scalerX.transform(x_)
y_ = adni1_mci['conv_2_ad'].values.ravel()
confounds = adni1_mci[['gender','age_scan','mean_gm','tiv']].values
#confounds = data[['sex','age_r']].values[mask_mci,:]
#confounds[:, 1:] = scaler.transform(confounds[:, 1:])
#confounds[:, 0] = preprocessing.binarize(confounds[:, 0].reshape(-1, 1), threshold=1)[:, 0]
#confounds = scaler.transform(confounds)
#x_ = crm.transform(confounds, x_)
x_ = scaler.transform(np.hstack((x_,confounds)))
x_.shape, y_.shape, confounds.shape
((235, 9), (235,), (235, 4))
array_results, dic_results = hpc.predict(x_, x_)
# Level 1
print('Level 1')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float))
print('Level 2')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float), dic_results['s2df'][:,1]>0)
#stats_mask(dic_results['s2df'][:,2]>0)
#stats_mask(dic_results['s2df'][:,3]>0)
Level 1 ------------------------ Ratio: 1.0 # : 147.0 # true values: 235 ACC : 0.7617021276595745 Level 2 ------------------------ Ratio: 0.6462585034013606 # : 95.0 # true values: 106 ACC : 0.8962264150943396
y_pred = (dic_results['s1df'][:,0]>0).astype(float)
lr_decision = dic_results['s2df'][:,1]
predic_stats(y_, y_pred, lr_decision)
Total number of TARGET subjects: 147.0 Total number of NON-TARGET subjects: 88.0 Stage 1 number of hits (true and false positives): 143.0 Stage 1 TRUE positives: 117.0 Stage 1 FALSE positives: 26.0 Stage 1 TRUE negatives: 62.0 Total number of flagged HPC-AD subjects: 106.0 Number of flagged HPC-AD subjects that are TRUE positives: 95 Number of flagged HPC-AD subjects that are FALSE positives: 11.0 Number of true negatives: 77.0 ############################# Stage 1 stats for TARGET vs NON-TARGET Precision for AD: 0.8181818181818182 Recall (or sensitivity) for AD: 0.7959183673469388 Specificity: 0.7045454545454546 Adjusted precision for 33.6% baseline rate: 0.5768390386016025 Accuracy: 0.7617021276595745 ############################# Stage 2 stats for TARGET vs NON-TARGET Precision for HPC-AD: 0.8962264150943396 Recall (or sensitivity) for HPC-AD: 0.6462585034013606 Specificity: 0.875 Adjusted precision for 33.6% baseline rate: 0.723465016658734 Accuracy: 0.7319148936170212
(0.7959183673469388, 0.7045454545454546, 0.8181818181818182, 0.7617021276595745, 0.6462585034013606, 0.875, 0.8962264150943396, 0.7319148936170212)
#base = high_confidence_at.BaseSvc()
#base.fit(x_, y_)
y_predicted = base.predict(x_)
y_score = base.decision_function(x_)
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_b = dict()
tpr_b = dict()
roc_auc_b = dict()
for i in range(n_classes):
fpr_b[i], tpr_b[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_b[i] = auc(fpr_b[i], tpr_b[i])
# Compute micro-average ROC curve and ROC area
fpr_b["micro"], tpr_b["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_b["micro"] = auc(fpr_b["micro"], tpr_b["micro"])
average_precision_b = average_precision_score(y_true, y_score)
print('Average precision-recall score: {0:0.2f}'.format(
average_precision_b))
precision_b, recall_b, _ = precision_recall_curve(y_true, y_score)
Average precision-recall score: 0.89
y_true = y_.astype(int)
y_true = label_binarize(y_, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(lr_decision, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_h = dict()
tpr_h = dict()
roc_auc_h = dict()
for i in range(n_classes):
fpr_h[i], tpr_h[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_h[i] = auc(fpr_h[i], tpr_h[i])
# Compute micro-average ROC curve and ROC area
fpr_h["micro"], tpr_h["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_h["micro"] = auc(fpr_h["micro"], tpr_h["micro"])
y_predicted = grclf_svm.predict(x_)
y_score = grclf_svm.decision_function(x_)
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_svcrbf = dict()
tpr_svcrbf = dict()
roc_auc_svcrbf = dict()
for i in range(n_classes):
fpr_svcrbf[i], tpr_svcrbf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_svcrbf[i] = auc(fpr_svcrbf[i], tpr_svcrbf[i])
# Compute micro-average ROC curve and ROC area
fpr_svcrbf["micro"], tpr_svcrbf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_svcrbf["micro"] = auc(fpr_svcrbf["micro"], tpr_svcrbf["micro"])
y_predicted = grclf_knn.predict(x_)
y_score = grclf_knn.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_knn = dict()
tpr_knn = dict()
roc_auc_knn = dict()
for i in range(n_classes):
fpr_knn[i], tpr_knn[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_knn[i] = auc(fpr_knn[i], tpr_knn[i])
# Compute micro-average ROC curve and ROC area
fpr_knn["micro"], tpr_knn["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_knn["micro"] = auc(fpr_knn["micro"], tpr_knn["micro"])
y_predicted = grclf_rf.predict(x_)
y_score = grclf_rf.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_rf = dict()
tpr_rf = dict()
roc_auc_rf = dict()
for i in range(n_classes):
fpr_rf[i], tpr_rf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_rf[i] = auc(fpr_rf[i], tpr_rf[i])
# Compute micro-average ROC curve and ROC area
fpr_rf["micro"], tpr_rf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_rf["micro"] = auc(fpr_rf["micro"], tpr_rf["micro"])
y_predicted = clf_gnb.predict(x_)
y_score = clf_gnb.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_nb = dict()
tpr_nb = dict()
roc_auc_nb = dict()
for i in range(n_classes):
fpr_nb[i], tpr_nb[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_nb[i] = auc(fpr_nb[i], tpr_nb[i])
# Compute micro-average ROC curve and ROC area
fpr_nb["micro"], tpr_nb["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_nb["micro"] = auc(fpr_nb["micro"], tpr_nb["micro"])
#plt.figure()
fig, ax = plt.subplots()
fig.set_size_inches(10,8)
lw = 4
plt.rc('xtick', labelsize=40)
plt.rc('ytick', labelsize=40)
plt.plot(fpr_svcrbf[0], tpr_svcrbf[0], color='green',
lw=lw, label='RBF SVM (AUC=%0.3f)' % roc_auc_svcrbf[0])
plt.plot(fpr_knn[0], tpr_knn[0], color='pink',
lw=lw, label='KNN (AUC=%0.3f)' % roc_auc_knn[0])
plt.plot(fpr_rf[0], tpr_rf[0], color='brown',
lw=lw, label='RF (AUC=%0.3f)' % roc_auc_rf[0])
plt.plot(fpr_nb[0], tpr_nb[0], color='orange',
lw=lw, label='GNB (AUC=%0.3f)' % roc_auc_nb[0])
plt.plot(fpr_b[0], tpr_b[0], color='blue',
lw=lw, label='Base (AUC=%0.3f)' % roc_auc_b[0])
plt.plot(fpr_h[0], tpr_h[0], color='red',
lw=lw, label='HPS (AUC=%0.3f)' % roc_auc_h[0])
plt.plot([0, 1], [0, 1], color='grey', lw=lw, linestyle='--')
plt.xlim([-0.05, 1.00])
plt.ylim([0.0, 1.05])
plt.xlabel('FPR', fontdict={'size': 40})
plt.ylabel('TPR', fontdict={'size': 40})
plt.title('ADNI1 pMCI vs sMCI', fontdict={'size': 40})
plt.legend(loc="lower right", prop={'size': 25})
#ax.legend(loc='center left', bbox_to_anchor=(1,0.5), prop={'size': 30})
plt.show()
fig.savefig(path_results + 'adni1_mci_roc_multi.pdf', bbox_inches='tight')
adni2_df = pd.read_csv('/home/angela/Desktop/vcog_paper/adni1_vbm_adcn_subtypes_20171209/7clus/adni2_model_weights.csv')
adni2_df.drop(adni2_df[adni2_df.RID < 2000].index,inplace=True)
# get rid of NaNs
adni2_df.dropna(axis=0,how='any',subset=['sub1','gender','age_scan','mean_gm','tiv','conv_2_ad',
'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR'],inplace=True)
adni2_df = adni2_df[['RID','age_scan','gender','mean_gm','tiv',
'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR',
'sub1','sub2','sub3','sub4','sub5','sub6','sub7',
'ABETA','TAU','conv_2_ad','AD','MCI','CN','APOE4_bin','DX']]
# Mask of the AD and CN subjects only
mask_cnad = adni2_df.loc[:,['CN','AD']].values.sum(1).astype(bool)
x_ = adni2_df.iloc[mask_cnad,adni2_df.columns.get_loc("ADAS13"):adni2_df.columns.get_loc("CLOCKSCOR")+1].values
#x_ = scalerX.transform(x_)
y_ = adni2_df[['AD']].values.ravel()[mask_cnad]
confounds = adni2_df[['gender','age_scan','mean_gm','tiv']].values[mask_cnad,:]
#confounds = data[['sex','age_r']].values[mask_mci,:]
#confounds[:, 1:] = scaler.transform(confounds[:, 1:])
#confounds[:, 0] = preprocessing.binarize(confounds[:, 0].reshape(-1, 1), threshold=1)[:, 0]
#confounds = scaler.transform(confounds)
#x_ = crm.transform(confounds, x_)
x_ = scaler.transform(np.hstack((x_,confounds)))
x_.shape, y_.shape, confounds.shape
((276, 9), (276,), (276, 4))
array_results, dic_results = hpc.predict(x_, x_)
# Level 1
print('Level 1')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float))
print('Level 2')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float), dic_results['s2df'][:,1]>0)
#stats_mask(dic_results['s2df'][:,2]>0)
#stats_mask(dic_results['s2df'][:,3]>0)
Level 1 ------------------------ Ratio: 1.0 # : 88.0 # true values: 276 ACC : 0.9565217391304348 Level 2 ------------------------ Ratio: 0.8863636363636364 # : 78.0 # true values: 79 ACC : 0.9873417721518988
y_pred = (dic_results['s1df'][:,0]>0).astype(float)
lr_decision = dic_results['s2df'][:,1]
predic_stats(y_, y_pred, lr_decision)
Total number of TARGET subjects: 88.0 Total number of NON-TARGET subjects: 188.0 Stage 1 number of hits (true and false positives): 90.0 Stage 1 TRUE positives: 83.0 Stage 1 FALSE positives: 7.0 Stage 1 TRUE negatives: 181.0 Total number of flagged HPC-AD subjects: 79.0 Number of flagged HPC-AD subjects that are TRUE positives: 78 Number of flagged HPC-AD subjects that are FALSE positives: 1.0 Number of true negatives: 187.0 ############################# Stage 1 stats for TARGET vs NON-TARGET Precision for AD: 0.9222222222222223 Recall (or sensitivity) for AD: 0.9431818181818182 Specificity: 0.9627659574468085 Adjusted precision for 33.6% baseline rate: 0.9276315789473684 Accuracy: 0.9565217391304348 ############################# Stage 2 stats for TARGET vs NON-TARGET Precision for HPC-AD: 0.9873417721518988 Recall (or sensitivity) for HPC-AD: 0.8863636363636364 Specificity: 0.9946808510638298 Adjusted precision for 33.6% baseline rate: 0.9882796955031514 Accuracy: 0.9601449275362319
(0.9431818181818182, 0.9627659574468085, 0.9222222222222223, 0.9565217391304348, 0.8863636363636364, 0.9946808510638298, 0.9873417721518988, 0.9601449275362319)
y_predicted = base.predict(x_)
y_score = base.decision_function(x_)
y_score.shape
(276,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_b = dict()
tpr_b = dict()
roc_auc_b = dict()
for i in range(n_classes):
fpr_b[i], tpr_b[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_b[i] = auc(fpr_b[i], tpr_b[i])
# Compute micro-average ROC curve and ROC area
fpr_b["micro"], tpr_b["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_b["micro"] = auc(fpr_b["micro"], tpr_b["micro"])
average_precision_b = average_precision_score(y_true, y_score)
print('Average precision-recall score: {0:0.2f}'.format(
average_precision_b))
precision_b, recall_b, _ = precision_recall_curve(y_true, y_score)
Average precision-recall score: 0.99
y_true = y_.astype(int)
y_true = label_binarize(y_, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(lr_decision, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_h = dict()
tpr_h = dict()
roc_auc_h = dict()
for i in range(n_classes):
fpr_h[i], tpr_h[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_h[i] = auc(fpr_h[i], tpr_h[i])
# Compute micro-average ROC curve and ROC area
fpr_h["micro"], tpr_h["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_h["micro"] = auc(fpr_h["micro"], tpr_h["micro"])
average_precision_h = average_precision_score(y_true, y_score)
print('Average precision-recall score: {0:0.2f}'.format(
average_precision_h))
precision_h, recall_h, _ = precision_recall_curve(y_true, y_score)
Average precision-recall score: 0.99
y_predicted = grclf_svm.predict(x_)
y_score = grclf_svm.decision_function(x_)
y_score.shape
(276,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_svcrbf = dict()
tpr_svcrbf = dict()
roc_auc_svcrbf = dict()
for i in range(n_classes):
fpr_svcrbf[i], tpr_svcrbf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_svcrbf[i] = auc(fpr_svcrbf[i], tpr_svcrbf[i])
# Compute micro-average ROC curve and ROC area
fpr_svcrbf["micro"], tpr_svcrbf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_svcrbf["micro"] = auc(fpr_svcrbf["micro"], tpr_svcrbf["micro"])
y_predicted = grclf_knn.predict(x_)
y_score = grclf_knn.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(276,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_knn = dict()
tpr_knn = dict()
roc_auc_knn = dict()
for i in range(n_classes):
fpr_knn[i], tpr_knn[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_knn[i] = auc(fpr_knn[i], tpr_knn[i])
# Compute micro-average ROC curve and ROC area
fpr_knn["micro"], tpr_knn["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_knn["micro"] = auc(fpr_knn["micro"], tpr_knn["micro"])
y_predicted = grclf_rf.predict(x_)
y_score = grclf_rf.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(276,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_rf = dict()
tpr_rf = dict()
roc_auc_rf = dict()
for i in range(n_classes):
fpr_rf[i], tpr_rf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_rf[i] = auc(fpr_rf[i], tpr_rf[i])
# Compute micro-average ROC curve and ROC area
fpr_rf["micro"], tpr_rf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_rf["micro"] = auc(fpr_rf["micro"], tpr_rf["micro"])
y_predicted = clf_gnb.predict(x_)
y_score = clf_gnb.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(276,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_nb = dict()
tpr_nb = dict()
roc_auc_nb = dict()
for i in range(n_classes):
fpr_nb[i], tpr_nb[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_nb[i] = auc(fpr_nb[i], tpr_nb[i])
# Compute micro-average ROC curve and ROC area
fpr_nb["micro"], tpr_nb["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_nb["micro"] = auc(fpr_nb["micro"], tpr_nb["micro"])
#plt.figure()
fig, ax = plt.subplots()
fig.set_size_inches(10,8)
lw = 4
plt.rc('xtick', labelsize=40)
plt.rc('ytick', labelsize=40)
plt.plot(fpr_svcrbf[0], tpr_svcrbf[0], color='green',
lw=lw, label='RBF SVM (AUC=%0.3f)' % roc_auc_svcrbf[0])
plt.plot(fpr_knn[0], tpr_knn[0], color='pink',
lw=lw, label='KNN (AUC=%0.3f)' % roc_auc_knn[0])
plt.plot(fpr_rf[0], tpr_rf[0], color='brown',
lw=lw, label='RF (AUC=%0.3f)' % roc_auc_rf[0])
plt.plot(fpr_nb[0], tpr_nb[0], color='orange',
lw=lw, label='GNB (AUC=%0.3f)' % roc_auc_nb[0])
plt.plot(fpr_b[0], tpr_b[0], color='blue',
lw=lw, label='Base (AUC=%0.3f)' % roc_auc_b[0])
plt.plot(fpr_h[0], tpr_h[0], color='red',
lw=lw, label='HPS (AUC=%0.3f)' % roc_auc_h[0])
plt.plot([0, 1], [0, 1], color='grey', lw=lw, linestyle='--')
plt.xlim([-0.05, 1.00])
plt.ylim([0.0, 1.05])
plt.xlabel('FPR', fontdict={'size': 40})
plt.ylabel('TPR', fontdict={'size': 40})
plt.title('ADNI2 AD vs CN', fontdict={'size': 40})
plt.legend(loc="lower right", prop={'size': 25})
#ax.legend(loc='center left', bbox_to_anchor=(1,0.5), prop={'size': 30})
plt.show()
fig.savefig(path_results + 'adni2_ad_roc_multi.pdf', bbox_inches='tight')
adni2_mci = pd.read_csv('/home/angela/Desktop/vcog_paper/adni1_vbm_adcn_subtypes_20171209/7clus/adni2_mci_bl_demog_weights.csv')
mask_mci = adni2_mci.loc[:,'MCI'].values.astype(bool)
adni2_mci = adni2_mci.iloc[mask_mci]
adni2_mci.dropna(subset=['sub1','age_scan','gender','mean_gm','tiv',
'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR'],inplace=True)
adni2_mci = adni2_mci[['RID','age_scan','gender','mean_gm','tiv',
'ADAS13','ADNI_MEM','ADNI_EF','BNTTOTAL','CLOCKSCOR',
'sub1','sub2','sub3','sub4','sub5','sub6','sub7',
'ABETA','TAU','conv_2_ad','AD','MCI','CN','APOE4_bin','DX','SUMMARYSUVR_WHOLECEREBNORM_1.11CUTOFF',
'Month_conv']]
x_ = adni2_mci.iloc[:, adni2_mci.columns.get_loc("ADAS13"):adni2_mci.columns.get_loc("CLOCKSCOR")+1].values
#x_ = scalerX.transform(x_)
y_ = adni2_mci[['conv_2_ad']].values.ravel()
confounds = adni2_mci[['gender','age_scan','mean_gm','tiv']].values
#confounds = data[['sex','age_r']].values[mask_mci,:]
#confounds[:, 1:] = scaler.transform(confounds[:, 1:])
#confounds[:, 0] = preprocessing.binarize(confounds[:, 0].reshape(-1, 1), threshold=1)[:, 0]
#confounds = scaler.transform(confounds)
#x_ = crm.transform(confounds, x_)
x_ = scaler.transform(np.hstack((x_,confounds)))
x_.shape, y_.shape, confounds.shape
((235, 9), (235,), (235, 4))
array_results, dic_results = hpc.predict(x_, x_)
# Level 1
print('Level 1')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float))
print('Level 2')
stats_mask(y_, (dic_results['s1df'][:,0]>0).astype(float), dic_results['s2df'][:,1]>0)
#stats_mask(dic_results['s2df'][:,2]>0)
#stats_mask(dic_results['s2df'][:,3]>0)
Level 1 ------------------------ Ratio: 1.0 # : 55.0 # true values: 235 ACC : 0.8425531914893617 Level 2 ------------------------ Ratio: 0.5636363636363636 # : 31.0 # true values: 40 ACC : 0.775
y_pred = (dic_results['s1df'][:,0]>0).astype(float)
lr_decision = dic_results['s2df'][:,1]
predic_stats(y_, y_pred, lr_decision)
Total number of TARGET subjects: 55.0 Total number of NON-TARGET subjects: 180.0 Stage 1 number of hits (true and false positives): 60.0 Stage 1 TRUE positives: 39.0 Stage 1 FALSE positives: 21.0 Stage 1 TRUE negatives: 159.0 Total number of flagged HPC-AD subjects: 40.0 Number of flagged HPC-AD subjects that are TRUE positives: 31 Number of flagged HPC-AD subjects that are FALSE positives: 9.0 Number of true negatives: 171.0 ############################# Stage 1 stats for TARGET vs NON-TARGET Precision for AD: 0.65 Recall (or sensitivity) for AD: 0.7090909090909091 Specificity: 0.8833333333333333 Adjusted precision for 33.6% baseline rate: 0.7546358505778016 Accuracy: 0.8425531914893617 ############################# Stage 2 stats for TARGET vs NON-TARGET Precision for HPC-AD: 0.775 Recall (or sensitivity) for HPC-AD: 0.5636363636363636 Specificity: 0.95 Adjusted precision for 33.6% baseline rate: 0.8508413657899034 Accuracy: 0.8595744680851064
(0.7090909090909091, 0.8833333333333333, 0.65, 0.8425531914893617, 0.5636363636363636, 0.95, 0.775, 0.8595744680851064)
y_predicted = base.predict(x_)
y_score = base.decision_function(x_)
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_b = dict()
tpr_b = dict()
roc_auc_b = dict()
for i in range(n_classes):
fpr_b[i], tpr_b[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_b[i] = auc(fpr_b[i], tpr_b[i])
# Compute micro-average ROC curve and ROC area
fpr_b["micro"], tpr_b["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_b["micro"] = auc(fpr_b["micro"], tpr_b["micro"])
average_precision_b = average_precision_score(y_true, y_score)
print('Average precision-recall score: {0:0.2f}'.format(
average_precision_b))
precision_b, recall_b, _ = precision_recall_curve(y_true, y_score)
Average precision-recall score: 0.76
y_true = y_.astype(int)
y_true = label_binarize(y_, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(lr_decision, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_h = dict()
tpr_h = dict()
roc_auc_h = dict()
for i in range(n_classes):
fpr_h[i], tpr_h[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_h[i] = auc(fpr_h[i], tpr_h[i])
# Compute micro-average ROC curve and ROC area
fpr_h["micro"], tpr_h["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_h["micro"] = auc(fpr_h["micro"], tpr_h["micro"])
average_precision_h = average_precision_score(y_true, y_score)
print('Average precision-recall score: {0:0.2f}'.format(
average_precision_h))
precision_h, recall_h, _ = precision_recall_curve(y_true, y_score)
Average precision-recall score: 0.71
y_predicted = grclf_svm.predict(x_)
y_score = grclf_svm.decision_function(x_)
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_svcrbf = dict()
tpr_svcrbf = dict()
roc_auc_svcrbf = dict()
for i in range(n_classes):
fpr_svcrbf[i], tpr_svcrbf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_svcrbf[i] = auc(fpr_svcrbf[i], tpr_svcrbf[i])
# Compute micro-average ROC curve and ROC area
fpr_svcrbf["micro"], tpr_svcrbf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_svcrbf["micro"] = auc(fpr_svcrbf["micro"], tpr_svcrbf["micro"])
y_predicted = grclf_knn.predict(x_)
y_score = grclf_knn.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_knn = dict()
tpr_knn = dict()
roc_auc_knn = dict()
for i in range(n_classes):
fpr_knn[i], tpr_knn[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_knn[i] = auc(fpr_knn[i], tpr_knn[i])
# Compute micro-average ROC curve and ROC area
fpr_knn["micro"], tpr_knn["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_knn["micro"] = auc(fpr_knn["micro"], tpr_knn["micro"])
y_predicted = grclf_rf.predict(x_)
y_score = grclf_rf.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_rf = dict()
tpr_rf = dict()
roc_auc_rf = dict()
for i in range(n_classes):
fpr_rf[i], tpr_rf[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_rf[i] = auc(fpr_rf[i], tpr_rf[i])
# Compute micro-average ROC curve and ROC area
fpr_rf["micro"], tpr_rf["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_rf["micro"] = auc(fpr_rf["micro"], tpr_rf["micro"])
y_predicted = clf_gnb.predict(x_)
y_score = clf_gnb.predict_proba(x_)
y_score = y_score[:,1] # take positive class
y_score.shape
(235,)
y_true = y_.astype(int)
y_true = label_binarize(y_true, classes=[0, 1])
n_classes = y_true.shape[1]
y_score = np.reshape(y_score, (y_score.shape[0],1))
# Compute ROC curve and ROC area for each class
fpr_nb = dict()
tpr_nb = dict()
roc_auc_nb = dict()
for i in range(n_classes):
fpr_nb[i], tpr_nb[i], _ = roc_curve(y_true[:, i], y_score[:, i])
roc_auc_nb[i] = auc(fpr_nb[i], tpr_nb[i])
# Compute micro-average ROC curve and ROC area
fpr_nb["micro"], tpr_nb["micro"], _ = roc_curve(y_true.ravel(), y_score.ravel())
roc_auc_nb["micro"] = auc(fpr_nb["micro"], tpr_nb["micro"])
#plt.figure()
fig, ax = plt.subplots()
fig.set_size_inches(10,8)
lw = 4
plt.rc('xtick', labelsize=40)
plt.rc('ytick', labelsize=40)
plt.plot(fpr_svcrbf[0], tpr_svcrbf[0], color='green',
lw=lw, label='RBF SVM (AUC=%0.3f)' % roc_auc_svcrbf[0])
plt.plot(fpr_knn[0], tpr_knn[0], color='pink',
lw=lw, label='KNN (AUC=%0.3f)' % roc_auc_knn[0])
plt.plot(fpr_rf[0], tpr_rf[0], color='brown',
lw=lw, label='RF (AUC=%0.3f)' % roc_auc_rf[0])
plt.plot(fpr_nb[0], tpr_nb[0], color='orange',
lw=lw, label='GNB (AUC=%0.3f)' % roc_auc_nb[0])
plt.plot(fpr_b[0], tpr_b[0], color='blue',
lw=lw, label='Base (AUC=%0.3f)' % roc_auc_b[0])
plt.plot(fpr_h[0], tpr_h[0], color='red',
lw=lw, label='HPS (AUC=%0.3f)' % roc_auc_h[0])
plt.plot([0, 1], [0, 1], color='grey', lw=lw, linestyle='--')
plt.xlim([-0.05, 1.00])
plt.ylim([0.0, 1.05])
plt.xlabel('FPR', fontdict={'size': 40})
plt.ylabel('TPR', fontdict={'size': 40})
plt.title('ADNI2 pMCI vs sMCI', fontdict={'size': 40})
plt.legend(loc="lower right", prop={'size': 25})
#ax.legend(loc='center left', bbox_to_anchor=(1,0.5), prop={'size': 30})
plt.show()
fig.savefig(path_results + 'adni2_mci_roc_multi.pdf', bbox_inches='tight')