import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.model_selection import KFold, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from joblib import Parallel, delayed
from mlxtend.evaluate import permutation_test
sns.set(rc={"figure.figsize":(4, 2)})
sns.set_style("whitegrid")
We load functional network matrices (netmats) from the HCP1200-release, as published on connectomeDB: https://db.humanconnectome.org/
Due to licensing issues, data is not supplied with the repository, but can be downloaded from the ConnectomeDB or via get_data.ipynb
(requires credentials).
See readme.md for more details.
# HCP data can be obtained from the connectomeDB
# data is not part of this repository
subjectIDs = pd.read_csv('hcp_data/subjectIDs.txt', header=None)
netmats_pearson = pd.read_csv('hcp_data/netmats1_correlationZ.txt',
sep=' ',
header=None)
netmats_pearson['ID'] = subjectIDs[0]
netmats_pearson.set_index('ID', drop=True, inplace=True)
netmats_parcor = pd.read_csv('hcp_data/netmats2_partial-correlation.txt',
sep=' ',
header=None)
netmats_parcor['ID'] = subjectIDs[0]
netmats_parcor.set_index('ID', drop=True, inplace=True)
behavior = pd.read_csv('hcp_data/hcp1200_behavioral_data.csv')
behavior = behavior.set_index('Subject', drop=True)
# convert age to numeric
age = []
for s in behavior['Age']:
if s == '36+':
age.append(36)
else:
split = s.split(sep='-')
age.append(np.mean((float(split[0]), float(split[1]))))
behavior['age'] = age
behavior.describe()
T1_Count | T2_Count | 3T_RS-fMRI_Count | 3T_RS-fMRI_PctCompl | 3T_tMRI_PctCompl | fMRI_WM_PctCompl | fMRI_Gamb_PctCompl | fMRI_Mot_PctCompl | fMRI_Lang_PctCompl | fMRI_Soc_PctCompl | ... | Odor_Unadj | Odor_AgeAdj | PainIntens_RawScore | PainInterf_Tscore | Taste_Unadj | Taste_AgeAdj | Mars_Log_Score | Mars_Errs | Mars_Final | age | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 1206.000000 | 1206.000000 | 1206.000000 | 1206.000000 | 1206.000000 | 1206.000000 | 1206.000000 | 1206.000000 | 1206.000000 | 1206.000000 | ... | 1204.000000 | 1204.000000 | 1201.000000 | 1205.000000 | 1200.000000 | 1200.000000 | 1198.000000 | 1195.000000 | 1195.000000 | 1206.000000 |
mean | 1.478441 | 1.400498 | 3.509950 | 87.213267 | 88.219569 | 89.718076 | 89.852736 | 89.631675 | 87.027114 | 87.039801 | ... | 110.421321 | 97.727500 | 1.449625 | 45.847718 | 95.166983 | 93.998533 | 1.845467 | 0.581590 | 1.822251 | 28.904229 |
std | 0.635688 | 0.628216 | 1.215181 | 31.027886 | 29.942161 | 30.384864 | 30.163559 | 30.492092 | 33.566043 | 33.570248 | ... | 9.107963 | 11.273251 | 1.783069 | 7.679288 | 14.583412 | 14.837851 | 0.541393 | 0.973172 | 0.542893 | 3.570475 |
min | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 82.740000 | 59.860000 | 0.000000 | 38.600000 | 56.350000 | 59.500000 | 1.560000 | 0.000000 | 1.080000 | 23.500000 |
25% | 1.000000 | 1.000000 | 4.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | 101.120000 | 87.110000 | 0.000000 | 38.600000 | 84.070000 | 83.220000 | 1.800000 | 0.000000 | 1.760000 | 28.000000 |
50% | 2.000000 | 1.000000 | 4.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | 108.790000 | 98.040000 | 1.000000 | 45.900000 | 95.360000 | 94.970000 | 1.800000 | 0.000000 | 1.800000 | 28.000000 |
75% | 2.000000 | 2.000000 | 4.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | 122.250000 | 110.450000 | 2.000000 | 52.200000 | 105.570000 | 102.920000 | 1.880000 | 1.000000 | 1.840000 | 33.000000 |
max | 2.000000 | 2.000000 | 4.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | ... | 122.250000 | 111.410000 | 10.000000 | 75.300000 | 134.650000 | 131.380000 | 15.000000 | 17.000000 | 15.000000 | 36.000000 |
8 rows × 456 columns
def create_data(target='CogTotalComp_AgeAdj', feature_data=netmats_parcor):
# it's a good practice to use pandas for merging, messing up subject order can be painful
features = feature_data.columns
df = behavior
df = df.merge(feature_data, left_index=True, right_index=True, how='left')
df = df.dropna(subset = [target] + features.values.tolist())
y = df[target].values
X = df[features].values
return X, y
We define a workhorse function which:
def corr(X, Y):
# just a small wrapper function (pandas correlation is silent in "unlucky" bootstraps with constant values)
return pd.Series(X).corr( pd.Series(Y))
def bootstrap_workhorse(X, y, sample_size, model, random_state, shuffle_y=False):
#create discovery and replication samples by random sampling from the whole dataset (without replacement)
# if shuffle_y is true, a null model is created bz permuting y
if shuffle_y:
rng = np.random.default_rng(random_state)
y = rng.permutation(y)
# sample the discovery and replication sets *without replacement* (with replacement introduces spurious dependencies)
X_discovery, X_replication, y_discovery, y_replication = train_test_split(X, y, train_size=sample_size, test_size=sample_size, shuffle=True, random_state=random_state)
# standard 10-fold cross-validation
cv = KFold(10)
# below we obtain cross-validated predictions in the discovery sample
predicted_discovery_cv = np.zeros_like(y_discovery) # here we collect the predictions for each fold
cor_per_fold = np.zeros(cv.n_splits) # here we collect the predictive performance in each fold
i = 0 # just a counter
for train, test in cv.split(X=X_discovery, y=y_discovery): # loop to leave one fold out
model.fit(X=X_discovery[train], y=y_discovery[train]) # fit model to the training set
predicted_discovery_cv[test] = model.predict(X=X_discovery[test]) # use fitted model to predict teh test set
cor_per_fold[i] = corr(y_discovery[test], predicted_discovery_cv[test]) # calculate performance on tne test set
i += 1
# calculate mean test performance across all folds
r_disc_cv = np.mean(cor_per_fold)
# 'finalize' model by training it on the full discovery sample (without cross-validation)
final_model = model.fit(X=X_discovery, y=y_discovery)
# obtain predictions with the final model on the discovery sample, note that this model actually overfits this sample.
# we do this only to demonstrate biased estimates
predicted_discovery_overfit = final_model.predict(X=X_discovery)
# here we obtain the biased effect size (r) estimates for demonstrational purposes
r_disc_overfit = corr(predicted_discovery_overfit, y_discovery)
# We use the final model to predict the replication sample
# This is correct (no overfitting here), the final model did not see this data during training
predicted_replication = final_model.predict(X=X_replication)
# we obtain the out-of-sample prediction performance estimates
r_rep = corr(predicted_replication, y_replication)
# below we calculate permutation-based p-values for all three effect size estimates (in-sample unbiased, in-sample biased, out-of-sample)
# (one sided tests, testing for positive correlation)
p_disc_cv = permutation_test(predicted_discovery_cv, y_discovery, method='approximate', num_rounds=1000, func=lambda x, y: corr(x, y),seed=random_state)
p_disc_overfit = permutation_test(predicted_discovery_overfit, y_discovery, method='approximate', num_rounds=1000, func=lambda x, y: corr(x, y),seed=random_state)
p_rep = permutation_test(predicted_replication, y_replication, method='approximate', num_rounds=1000, func=lambda x, y: corr(x, y),seed=random_state)
# return results
return r_disc_cv, r_disc_overfit, r_rep, p_disc_cv, p_disc_overfit, p_rep
All set, now we start the analysis.
Here we train a few different models on 100 bootstrap samples.
We aggregate the results of our workhorse function in n_bootstrap
=100 bootstrap cases (run in parallel).
The whole process is repeated for all sample sizes, fetaure_sets and target variables.
targets = [
#demographics
'age',
# cognition
'MMSE_Score',
'PSQI_Score',
'PicSeq_AgeAdj',
'CardSort_AgeAdj',
'Flanker_AgeAdj',
'PMAT24_A_CR',
'CogFluidComp_AgeAdj',
'CogCrystalComp_AgeAdj',
'CogEarlyComp_AgeAdj',
'CogTotalComp_AgeAdj',
'ReadEng_AgeAdj',
'PicVocab_AgeAdj',
'ProcSpeed_AgeAdj',
'DDisc_AUC_200',
'VSPLOT_TC',
'SCPT_SEN',
'SCPT_SPEC',
# memory
'IWRD_TOT',
'ListSort_AgeAdj',
# emotion
'ER40ANG',
'ER40FEAR',
'ER40HAP',
'ER40NOE',
'ER40SAD',
'AngAffect_Unadj',
'AngHostil_Unadj',
'AngAggr_Unadj',
'FearAffect_Unadj',
'FearSomat_Unadj',
'FearSomat_Unadj',
'Sadness_Unadj',
'LifeSatisf_Unadj',
'MeanPurp_Unadj',
'PosAffect_Unadj',
'Friendship_Unadj',
'Loneliness_Unadj',
'PercHostil_Unadj',
'PercReject_Unadj',
'EmotSupp_Unadj',
'InstruSupp_Unadj',
'PercStress_Unadj',
'SelfEff_Unadj',
'Dexterity_AgeAdj',
# personality
'NEOFAC_A',
'NEOFAC_O',
'NEOFAC_C',
'NEOFAC_N',
'NEOFAC_E',
# sensory
'Noise_Comp',
'Odor_AgeAdj',
'PainInterf_Tscore',
'Taste_AgeAdj',
'Mars_Final'
]
(no feature selection, no hyperparameter optimization)
%%time
random_state = 42
n_bootstrap = 100
features = {
'netmats_parcor': netmats_parcor#,
#'netmats_pearson': netmats_pearson
}
models = {
'ridge': Ridge()
}
# We aggregate all results here:
df = pd.DataFrame(columns=['connectivity','model','target','n','r_discovery_cv','r_discovery_overfit','r_replication','p_discovery_cv','p_discovery_overfit','p_replication'])
for feature_set in features:
for model in models:
for target_var in targets:
for sample_size in ['max']:
print('*****************************************************************')
print(feature_set, model, target_var, sample_size)
X, y = create_data(target=target_var, feature_data=features[feature_set])
if sample_size=='max':
sample_size = int(len(y)/2)
# create random seeds for each bootstrap iteration for reproducibility
rng = np.random.default_rng(random_state)
random_sates = rng.integers(np.iinfo(np.int32).max, size=n_bootstrap)
# run bootstrap iterations in parallel
r_discovery_cv, r_discovery_overfit, r_replication, p_discovery_cv, p_discovery_overfit, p_replication = zip(
*Parallel(n_jobs=-1)(
delayed(bootstrap_workhorse)(X, y, sample_size, models[model], seed) for seed in random_sates))
tmp_data_frame = pd.DataFrame({
'connectivity' : feature_set,
'model' : model,
'target' : target_var,
'n' : sample_size,
'r_discovery_cv': r_discovery_cv,
'r_discovery_overfit': r_discovery_overfit,
'r_replication': r_replication,
'p_discovery_cv': p_discovery_cv,
'p_discovery_overfit': p_discovery_overfit,
'p_replication': p_replication
})
#sns.scatterplot(x='r_replication', y='r_discovery_cv', data=tmp_data_frame)
#plt.ylabel('in-sample (r)')
#plt.xlabel('out-of-sample (r_pred)')
#plt.show()
print('r discovery (with cv) :', tmp_data_frame.r_discovery_cv.mean(), 'r replication:', tmp_data_frame.r_replication.mean())
for alpha in [0.05, 0.01, 0.005, 0.001]:
print('Replicability at alpha =', alpha, ':',
(tmp_data_frame.loc[tmp_data_frame['p_discovery_cv']<alpha,'p_replication']<alpha).sum() / (tmp_data_frame['p_discovery_cv']<0.05).sum() * 100, '%')
df = pd.concat((df, tmp_data_frame))
df.reset_index(drop=True, inplace=True)
df.to_csv('res/revised_results_Ridge.csv')
df
***************************************************************** netmats_parcor ridge age max r discovery (with cv) : 0.48020846359551633 r replication: 0.48967065593640496 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge MMSE_Score max r discovery (with cv) : 0.11235242553787785 r replication: 0.12970357972616348 Replicability at alpha = 0.05 : 89.87341772151899 % Replicability at alpha = 0.01 : 40.50632911392405 % Replicability at alpha = 0.005 : 25.31645569620253 % Replicability at alpha = 0.001 : 2.5316455696202533 % ***************************************************************** netmats_parcor ridge PSQI_Score max r discovery (with cv) : 0.14291306988028807 r replication: 0.14519995923911966 Replicability at alpha = 0.05 : 98.85057471264368 % Replicability at alpha = 0.01 : 72.41379310344827 % Replicability at alpha = 0.005 : 47.12643678160919 % Replicability at alpha = 0.001 : 16.091954022988507 % ***************************************************************** netmats_parcor ridge PicSeq_AgeAdj max r discovery (with cv) : 0.18233069324505247 r replication: 0.18593093519593903 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 93.87755102040816 % Replicability at alpha = 0.005 : 90.81632653061224 % Replicability at alpha = 0.001 : 66.3265306122449 % ***************************************************************** netmats_parcor ridge CardSort_AgeAdj max r discovery (with cv) : 0.1789658621130238 r replication: 0.18957693758366514 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 96.90721649484536 % Replicability at alpha = 0.005 : 91.75257731958763 % Replicability at alpha = 0.001 : 65.97938144329896 % ***************************************************************** netmats_parcor ridge Flanker_AgeAdj max r discovery (with cv) : 0.1322612349491906 r replication: 0.14022631939127772 Replicability at alpha = 0.05 : 98.87640449438202 % Replicability at alpha = 0.01 : 61.79775280898876 % Replicability at alpha = 0.005 : 42.69662921348314 % Replicability at alpha = 0.001 : 12.359550561797752 % ***************************************************************** netmats_parcor ridge PMAT24_A_CR max r discovery (with cv) : 0.2922426473940083 r replication: 0.301492004778787 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge CogFluidComp_AgeAdj max r discovery (with cv) : 0.3049801718197025 r replication: 0.2996751646332102 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge CogCrystalComp_AgeAdj max r discovery (with cv) : 0.509356447724508 r replication: 0.5252483106948896 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge CogEarlyComp_AgeAdj max r discovery (with cv) : 0.35010656810589025 r replication: 0.346212766761039 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge CogTotalComp_AgeAdj max r discovery (with cv) : 0.4726518883300038 r replication: 0.47903946590450225 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge ReadEng_AgeAdj max r discovery (with cv) : 0.450867003576911 r replication: 0.46022188861700536 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge PicVocab_AgeAdj max r discovery (with cv) : 0.4748017798587243 r replication: 0.4784423901738578 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge ProcSpeed_AgeAdj max r discovery (with cv) : 0.11764068520351804 r replication: 0.12249903623335612 Replicability at alpha = 0.05 : 84.61538461538461 % Replicability at alpha = 0.01 : 38.46153846153847 % Replicability at alpha = 0.005 : 21.794871794871796 % Replicability at alpha = 0.001 : 2.564102564102564 % ***************************************************************** netmats_parcor ridge DDisc_AUC_200 max r discovery (with cv) : 0.20884491911824618 r replication: 0.20706209788397087 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 99.0 % Replicability at alpha = 0.005 : 95.0 % Replicability at alpha = 0.001 : 83.0 % ***************************************************************** netmats_parcor ridge VSPLOT_TC max r discovery (with cv) : 0.33041854229644096 r replication: 0.3361466183063388 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge SCPT_SEN max r discovery (with cv) : 0.004953219542766921 r replication: 0.005954925720074722 Replicability at alpha = 0.05 : 0.0 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge SCPT_SPEC max r discovery (with cv) : 0.1483580336589026 r replication: 0.15543127763406267 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 78.02197802197803 % Replicability at alpha = 0.005 : 57.14285714285714 % Replicability at alpha = 0.001 : 26.373626373626376 % ***************************************************************** netmats_parcor ridge IWRD_TOT max r discovery (with cv) : 0.0810183743864671 r replication: 0.0739403812147235 Replicability at alpha = 0.05 : 36.53846153846153 % Replicability at alpha = 0.01 : 3.8461538461538463 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge ListSort_AgeAdj max r discovery (with cv) : 0.27262179944410553 r replication: 0.27629338784789925 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 100.0 % ***************************************************************** netmats_parcor ridge ER40ANG max r discovery (with cv) : 0.07290919946421724 r replication: 0.07930980325318832 Replicability at alpha = 0.05 : 44.680851063829785 % Replicability at alpha = 0.01 : 2.127659574468085 % Replicability at alpha = 0.005 : 2.127659574468085 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge ER40FEAR max r discovery (with cv) : 0.033788344197422604 r replication: 0.03217012035635588 Replicability at alpha = 0.05 : 0.0 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge ER40HAP max r discovery (with cv) : -0.05910616789376827 r replication: -0.040972411502727554 Replicability at alpha = 0.05 : nan % Replicability at alpha = 0.01 : nan % Replicability at alpha = 0.005 : nan % Replicability at alpha = 0.001 : nan % ***************************************************************** netmats_parcor ridge ER40NOE max
<timed exec>:58: RuntimeWarning: invalid value encountered in long_scalars
r discovery (with cv) : 0.05015028148085295 r replication: 0.05477097859212915 Replicability at alpha = 0.05 : 16.666666666666664 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge ER40SAD max r discovery (with cv) : 0.03355879047628217 r replication: 0.043958511763540556 Replicability at alpha = 0.05 : 10.0 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge AngAffect_Unadj max r discovery (with cv) : 0.09675041204325666 r replication: 0.097384559509148 Replicability at alpha = 0.05 : 69.23076923076923 % Replicability at alpha = 0.01 : 12.307692307692308 % Replicability at alpha = 0.005 : 1.5384615384615385 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge AngHostil_Unadj max r discovery (with cv) : 0.06336555348912118 r replication: 0.07505183534895289 Replicability at alpha = 0.05 : 41.02564102564102 % Replicability at alpha = 0.01 : 2.564102564102564 % Replicability at alpha = 0.005 : 2.564102564102564 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge AngAggr_Unadj max r discovery (with cv) : 0.19405555852511736 r replication: 0.20052370789491036 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 95.0 % Replicability at alpha = 0.005 : 93.0 % Replicability at alpha = 0.001 : 77.0 % ***************************************************************** netmats_parcor ridge FearAffect_Unadj max r discovery (with cv) : 0.15563217224852452 r replication: 0.15122577808620358 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 77.01149425287356 % Replicability at alpha = 0.005 : 66.66666666666666 % Replicability at alpha = 0.001 : 29.88505747126437 % ***************************************************************** netmats_parcor ridge FearSomat_Unadj max r discovery (with cv) : 0.061101136887113376 r replication: 0.054246596855310567 Replicability at alpha = 0.05 : 14.285714285714285 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge FearSomat_Unadj max r discovery (with cv) : 0.061101136887113376 r replication: 0.054246596855310567 Replicability at alpha = 0.05 : 14.285714285714285 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge Sadness_Unadj max r discovery (with cv) : 0.1327824232969155 r replication: 0.1244556654760563 Replicability at alpha = 0.05 : 94.04761904761905 % Replicability at alpha = 0.01 : 54.761904761904766 % Replicability at alpha = 0.005 : 35.714285714285715 % Replicability at alpha = 0.001 : 7.142857142857142 % ***************************************************************** netmats_parcor ridge LifeSatisf_Unadj max r discovery (with cv) : 0.19873341230892952 r replication: 0.18440610179827896 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 95.87628865979381 % Replicability at alpha = 0.005 : 92.78350515463917 % Replicability at alpha = 0.001 : 67.0103092783505 % ***************************************************************** netmats_parcor ridge MeanPurp_Unadj max r discovery (with cv) : 0.09984748458857128 r replication: 0.08905980179359992 Replicability at alpha = 0.05 : 50.76923076923077 % Replicability at alpha = 0.01 : 15.384615384615385 % Replicability at alpha = 0.005 : 4.615384615384616 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge PosAffect_Unadj max r discovery (with cv) : 0.09883821052746254 r replication: 0.08771575790081709 Replicability at alpha = 0.05 : 55.223880597014926 % Replicability at alpha = 0.01 : 10.44776119402985 % Replicability at alpha = 0.005 : 5.970149253731343 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge Friendship_Unadj max r discovery (with cv) : 0.08531254415798344 r replication: 0.08428001338641648 Replicability at alpha = 0.05 : 44.0 % Replicability at alpha = 0.01 : 10.0 % Replicability at alpha = 0.005 : 4.0 % Replicability at alpha = 0.001 : 2.0 % ***************************************************************** netmats_parcor ridge Loneliness_Unadj max r discovery (with cv) : 0.10181181257724871 r replication: 0.10338556615021999 Replicability at alpha = 0.05 : 71.66666666666667 % Replicability at alpha = 0.01 : 26.666666666666668 % Replicability at alpha = 0.005 : 6.666666666666667 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge PercHostil_Unadj max r discovery (with cv) : 0.037622230965088856 r replication: 0.04500642736764501 Replicability at alpha = 0.05 : 0.0 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge PercReject_Unadj max r discovery (with cv) : 0.03688725729680184 r replication: 0.03543635186966549 Replicability at alpha = 0.05 : 4.3478260869565215 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge EmotSupp_Unadj max r discovery (with cv) : 0.1260287186553436 r replication: 0.12226413039802284 Replicability at alpha = 0.05 : 96.55172413793103 % Replicability at alpha = 0.01 : 43.67816091954023 % Replicability at alpha = 0.005 : 28.735632183908045 % Replicability at alpha = 0.001 : 9.195402298850574 % ***************************************************************** netmats_parcor ridge InstruSupp_Unadj max r discovery (with cv) : 0.04191974313059748 r replication: 0.03262089935367679 Replicability at alpha = 0.05 : 4.166666666666666 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge PercStress_Unadj max r discovery (with cv) : 0.10021102140816969 r replication: 0.09269067112373008 Replicability at alpha = 0.05 : 62.5 % Replicability at alpha = 0.01 : 14.0625 % Replicability at alpha = 0.005 : 6.25 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge SelfEff_Unadj max r discovery (with cv) : 0.09990355623342788 r replication: 0.10774220880935355 Replicability at alpha = 0.05 : 81.81818181818183 % Replicability at alpha = 0.01 : 28.78787878787879 % Replicability at alpha = 0.005 : 12.121212121212121 % Replicability at alpha = 0.001 : 1.5151515151515151 % ***************************************************************** netmats_parcor ridge Dexterity_AgeAdj max r discovery (with cv) : 0.1698377324671013 r replication: 0.16739913235673143 Replicability at alpha = 0.05 : 98.9247311827957 % Replicability at alpha = 0.01 : 88.17204301075269 % Replicability at alpha = 0.005 : 81.72043010752688 % Replicability at alpha = 0.001 : 51.61290322580645 % ***************************************************************** netmats_parcor ridge NEOFAC_A max r discovery (with cv) : 0.19226360180293334 r replication: 0.2105816660368088 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 97.95918367346938 % Replicability at alpha = 0.005 : 94.89795918367348 % Replicability at alpha = 0.001 : 82.6530612244898 % ***************************************************************** netmats_parcor ridge NEOFAC_O max r discovery (with cv) : 0.26505077367461527 r replication: 0.28578623242498713 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 100.0 % Replicability at alpha = 0.005 : 100.0 % Replicability at alpha = 0.001 : 98.0 % ***************************************************************** netmats_parcor ridge NEOFAC_C max r discovery (with cv) : 0.17973714484091477 r replication: 0.19354830382121097 Replicability at alpha = 0.05 : 100.0 % Replicability at alpha = 0.01 : 92.78350515463917 % Replicability at alpha = 0.005 : 90.72164948453609 % Replicability at alpha = 0.001 : 64.94845360824742 % ***************************************************************** netmats_parcor ridge NEOFAC_N max r discovery (with cv) : 0.07314281912604746 r replication: 0.06696149072259926 Replicability at alpha = 0.05 : 40.0 % Replicability at alpha = 0.01 : 2.0 % Replicability at alpha = 0.005 : 2.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge NEOFAC_E max r discovery (with cv) : 0.15791961819822467 r replication: 0.16447031944134946 Replicability at alpha = 0.05 : 98.93617021276596 % Replicability at alpha = 0.01 : 84.04255319148936 % Replicability at alpha = 0.005 : 70.2127659574468 % Replicability at alpha = 0.001 : 35.1063829787234 % ***************************************************************** netmats_parcor ridge Noise_Comp max r discovery (with cv) : 0.13754389395248953 r replication: 0.13277944361089927 Replicability at alpha = 0.05 : 97.67441860465115 % Replicability at alpha = 0.01 : 62.7906976744186 % Replicability at alpha = 0.005 : 46.51162790697674 % Replicability at alpha = 0.001 : 10.465116279069768 % ***************************************************************** netmats_parcor ridge Odor_AgeAdj max r discovery (with cv) : 0.03422255970866912 r replication: 0.019033276660184433 Replicability at alpha = 0.05 : 0.0 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge PainInterf_Tscore max r discovery (with cv) : 0.03274152032524882 r replication: 0.03329128578051491 Replicability at alpha = 0.05 : 0.0 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge Taste_AgeAdj max r discovery (with cv) : 0.0944194746807131 r replication: 0.08063900075841376 Replicability at alpha = 0.05 : 43.103448275862064 % Replicability at alpha = 0.01 : 5.172413793103448 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % ***************************************************************** netmats_parcor ridge Mars_Final max r discovery (with cv) : 0.0815295729161019 r replication: 0.02266341772170401 Replicability at alpha = 0.05 : 6.666666666666667 % Replicability at alpha = 0.01 : 0.0 % Replicability at alpha = 0.005 : 0.0 % Replicability at alpha = 0.001 : 0.0 % CPU times: user 38 s, sys: 5.9 s, total: 43.9 s Wall time: 44min 5s
connectivity | model | target | n | r_discovery_cv | r_discovery_overfit | r_replication | p_discovery_cv | p_discovery_overfit | p_replication | |
---|---|---|---|---|---|---|---|---|---|---|
0 | netmats_parcor | ridge | age | 501 | 0.480679 | 1.0 | 0.514760 | 0.000999 | 0.000999 | 0.000999 |
1 | netmats_parcor | ridge | age | 501 | 0.477272 | 1.0 | 0.455021 | 0.000999 | 0.000999 | 0.000999 |
2 | netmats_parcor | ridge | age | 501 | 0.528641 | 1.0 | 0.482612 | 0.000999 | 0.000999 | 0.000999 |
3 | netmats_parcor | ridge | age | 501 | 0.488856 | 1.0 | 0.502899 | 0.000999 | 0.000999 | 0.000999 |
4 | netmats_parcor | ridge | age | 501 | 0.429166 | 1.0 | 0.530274 | 0.000999 | 0.000999 | 0.000999 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5395 | netmats_parcor | ridge | Mars_Final | 499 | 0.090619 | 1.0 | 0.031658 | 0.069930 | 0.000999 | 0.115884 |
5396 | netmats_parcor | ridge | Mars_Final | 499 | 0.005679 | 1.0 | 0.038146 | 0.565435 | 0.000999 | 0.147852 |
5397 | netmats_parcor | ridge | Mars_Final | 499 | 0.017233 | 1.0 | 0.057484 | 0.449550 | 0.000999 | 0.106893 |
5398 | netmats_parcor | ridge | Mars_Final | 499 | 0.009701 | 1.0 | 0.005276 | 0.526474 | 0.000999 | 0.400599 |
5399 | netmats_parcor | ridge | Mars_Final | 499 | -0.032289 | 1.0 | 0.017725 | 0.723277 | 0.000999 | 0.254745 |
5400 rows × 10 columns
df.groupby(['connectivity', 'model', 'target', 'n']).mean()[['r_discovery_cv', 'r_replication']]
r_discovery_cv | r_replication | ||||
---|---|---|---|---|---|
connectivity | model | target | n | ||
netmats_parcor | ridge | AngAffect_Unadj | 501 | 0.096750 | 0.097385 |
AngAggr_Unadj | 501 | 0.194056 | 0.200524 | ||
AngHostil_Unadj | 501 | 0.063366 | 0.075052 | ||
CardSort_AgeAdj | 500 | 0.178966 | 0.189577 | ||
CogCrystalComp_AgeAdj | 499 | 0.509356 | 0.525248 | ||
CogEarlyComp_AgeAdj | 497 | 0.350107 | 0.346213 | ||
CogFluidComp_AgeAdj | 496 | 0.304980 | 0.299675 | ||
CogTotalComp_AgeAdj | 495 | 0.472652 | 0.479039 | ||
DDisc_AUC_200 | 500 | 0.208845 | 0.207062 | ||
Dexterity_AgeAdj | 501 | 0.169838 | 0.167399 | ||
ER40ANG | 500 | 0.072909 | 0.079310 | ||
ER40FEAR | 500 | 0.033788 | 0.032170 | ||
ER40HAP | 500 | -0.059106 | -0.040972 | ||
ER40NOE | 500 | 0.050150 | 0.054771 | ||
ER40SAD | 500 | 0.033559 | 0.043959 | ||
EmotSupp_Unadj | 501 | 0.126029 | 0.122264 | ||
FearAffect_Unadj | 501 | 0.155632 | 0.151226 | ||
FearSomat_Unadj | 501 | 0.061101 | 0.054247 | ||
Flanker_AgeAdj | 501 | 0.132261 | 0.140226 | ||
Friendship_Unadj | 501 | 0.085313 | 0.084280 | ||
IWRD_TOT | 500 | 0.081018 | 0.073940 | ||
InstruSupp_Unadj | 501 | 0.041920 | 0.032621 | ||
LifeSatisf_Unadj | 501 | 0.198733 | 0.184406 | ||
ListSort_AgeAdj | 501 | 0.272622 | 0.276293 | ||
Loneliness_Unadj | 501 | 0.101812 | 0.103386 | ||
MMSE_Score | 501 | 0.112352 | 0.129704 | ||
Mars_Final | 499 | 0.081530 | 0.022663 | ||
MeanPurp_Unadj | 501 | 0.099847 | 0.089060 | ||
NEOFAC_A | 500 | 0.192264 | 0.210582 | ||
NEOFAC_C | 500 | 0.179737 | 0.193548 | ||
NEOFAC_E | 500 | 0.157920 | 0.164470 | ||
NEOFAC_N | 500 | 0.073143 | 0.066961 | ||
NEOFAC_O | 500 | 0.265051 | 0.285786 | ||
Noise_Comp | 498 | 0.137544 | 0.132779 | ||
Odor_AgeAdj | 500 | 0.034223 | 0.019033 | ||
PMAT24_A_CR | 499 | 0.292243 | 0.301492 | ||
PSQI_Score | 501 | 0.142913 | 0.145200 | ||
PainInterf_Tscore | 501 | 0.032742 | 0.033291 | ||
PercHostil_Unadj | 501 | 0.037622 | 0.045006 | ||
PercReject_Unadj | 501 | 0.036887 | 0.035436 | ||
PercStress_Unadj | 501 | 0.100211 | 0.092691 | ||
PicSeq_AgeAdj | 501 | 0.182331 | 0.185931 | ||
PicVocab_AgeAdj | 501 | 0.474802 | 0.478442 | ||
PosAffect_Unadj | 501 | 0.098838 | 0.087716 | ||
ProcSpeed_AgeAdj | 501 | 0.117641 | 0.122499 | ||
ReadEng_AgeAdj | 501 | 0.450867 | 0.460222 | ||
SCPT_SEN | 500 | 0.004953 | 0.005955 | ||
SCPT_SPEC | 500 | 0.148358 | 0.155431 | ||
Sadness_Unadj | 501 | 0.132782 | 0.124456 | ||
SelfEff_Unadj | 501 | 0.099904 | 0.107742 | ||
Taste_AgeAdj | 499 | 0.094419 | 0.080639 | ||
VSPLOT_TC | 500 | 0.330419 | 0.336147 | ||
age | 501 | 0.480208 | 0.489671 |
df=pd.read_csv('res/revised_results_Ridge.csv')
sns.set(rc={"figure.figsize":(14, 2)})
sns.set_style("white")
phenotypes = pd.read_csv('hcp_data/phenotypes-of-interest.csv').iloc[:, 1:]
phenotypes = phenotypes.drop([2,3]) # Age duplicates
def prep(x, alpha=0.05):
return (x.loc[x['p_discovery_cv']<alpha,'p_replication']<alpha).sum() / (x['p_discovery_cv']<0.05).sum() * 100
df_plot = pd.DataFrame(df[df.connectivity=='netmats_parcor'].groupby('target').apply(prep))
df_plot['r_discovery'] = df[df.connectivity=='netmats_parcor'].groupby('target').mean()['r_discovery_cv']
df_plot = df_plot[df_plot.index!='age']
df_plot['category'] = ''
df_plot['phenotype'] = df_plot.index
for i in df_plot.index:
df_plot.loc[i, 'category'] = phenotypes.loc[phenotypes.columnHeader==i, 'category'].values[0]
df_plot = df_plot.sort_values(by='r_discovery', ascending=False)
pal = "muted"
my_colormap = [
sns.color_palette(pal)[4],
sns.color_palette(pal)[0], # cognition
sns.color_palette(pal)[3], # emotion
sns.color_palette(pal)[2], # dexterity
sns.color_palette(pal)[1], # big5
sns.color_palette(pal)[5], # sensory
sns.color_palette(pal)[6], # ?
sns.color_palette(pal)[3]]
colors = []
for code in pd.Categorical(df_plot.category).codes:
colors.append(my_colormap[code])
ax=sns.barplot(data=df_plot, x='phenotype', y=0, palette=colors)
plt.xticks(rotation=90)
ax.axhline(80, color='black', linestyle=':')
plt.savefig('fig/replicability_all_phenotypes.pdf')
/tmp/ipykernel_19024/3478190779.py:9: RuntimeWarning: invalid value encountered in long_scalars return (x.loc[x['p_discovery_cv']<alpha,'p_replication']<alpha).sum() / (x['p_discovery_cv']<0.05).sum() * 100 /tmp/ipykernel_19024/3478190779.py:13: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. df_plot['r_discovery'] = df[df.connectivity=='netmats_parcor'].groupby('target').mean()['r_discovery_cv']
ax=sns.barplot(data=df_plot, x='phenotype', y='r_discovery', palette=colors)
plt.xticks(rotation=90)
ax.axhline(0.074, color='black', linestyle=':') # parametric one-sided significance threshold
ax.axhline(0, color='black')
plt.savefig('fig/r_discovery_all_phenotypes.pdf')
import plotly.express as px
fig = px.bar(df_plot, y=0, x='phenotype', text_auto='.2s',
title="Replication probability with n=500",
labels={'0':'$P_{rep}$'},
hover_data=['category', 'phenotype_long', 'r_discovery'], color='category')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.show()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[5], line 3 1 import plotly.express as px ----> 3 fig = px.bar(df_plot, y=0, x='phenotype', text_auto='.2s', 4 title="Replication probability with n=500", 5 labels={'0':'$P_{rep}$'}, 6 hover_data=['category', 'phenotype_long', 'r_discovery'], color='category') 7 fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False) 8 fig.show() File ~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_chart_types.py:373, in bar(data_frame, x, y, color, pattern_shape, facet_row, facet_col, facet_col_wrap, facet_row_spacing, facet_col_spacing, hover_name, hover_data, custom_data, text, base, error_x, error_x_minus, error_y, error_y_minus, animation_frame, animation_group, category_orders, labels, color_discrete_sequence, color_discrete_map, color_continuous_scale, pattern_shape_sequence, pattern_shape_map, range_color, color_continuous_midpoint, opacity, orientation, barmode, log_x, log_y, range_x, range_y, text_auto, title, template, width, height) 325 def bar( 326 data_frame=None, 327 x=None, (...) 367 height=None, 368 ) -> go.Figure: 369 """ 370 In a bar plot, each row of `data_frame` is represented as a rectangular 371 mark. 372 """ --> 373 return make_figure( 374 args=locals(), 375 constructor=go.Bar, 376 trace_patch=dict(textposition="auto"), 377 layout_patch=dict(barmode=barmode), 378 ) File ~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_core.py:1990, in make_figure(args, constructor, trace_patch, layout_patch) 1987 layout_patch = layout_patch or {} 1988 apply_default_cascade(args) -> 1990 args = build_dataframe(args, constructor) 1991 if constructor in [go.Treemap, go.Sunburst, go.Icicle] and args["path"] is not None: 1992 args = process_dataframe_hierarchy(args) File ~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_core.py:1405, in build_dataframe(args, constructor) 1402 args["color"] = None 1403 # now that things have been prepped, we do the systematic rewriting of `args` -> 1405 df_output, wide_id_vars = process_args_into_dataframe( 1406 args, wide_mode, var_name, value_name 1407 ) 1409 # now that `df_output` exists and `args` contains only references, we complete 1410 # the special-case and wide-mode handling by further rewriting args and/or mutating 1411 # df_output 1413 count_name = _escape_col_name(df_output, "count", [var_name, value_name]) File ~/src/BWAS_comment/venv/lib/python3.10/site-packages/plotly/express/_core.py:1207, in process_args_into_dataframe(args, wide_mode, var_name, value_name) 1205 if argument == "index": 1206 err_msg += "\n To use the index, pass it in directly as `df.index`." -> 1207 raise ValueError(err_msg) 1208 elif length and len(df_input[argument]) != length: 1209 raise ValueError( 1210 "All arguments should have the same length. " 1211 "The length of column argument `df[%s]` is %d, whereas the " (...) 1218 ) 1219 ) ValueError: Value of 'hover_data_1' is not the name of a column in 'data_frame'. Expected one of [0, 'r_discovery', 'category', 'phenotype'] but received: phenotype_long
import plotly
import plotly.graph_objs as go
colors = ["#de324c","#f4895f","#f8e16f","#95cf92","#9656a2","#369acc"]
cols = [ colors[c] for c in pd.Categorical(df_plot['category']).codes]
df_plot['category'] = pd.Categorical(df_plot['category'])
df_plot['category'].cat.reorder_categories(['Emotion', 'Sensory', 'Motor', 'Alertness', 'Personality', 'Cognition'], inplace= True)
df_plot = df_plot.sort_values(by=['category', 'r_discovery'], ascending=False)
trace1l = go.Scatter( name='',
x=df_plot['phenotype'].values,
y=[80] * len(df_plot['phenotype'].values),
mode='lines',
line = dict(color='gray', width=1, dash='dash'),
showlegend=False,
hovertemplate="Replication threshold"
)
trace1 = go.Bar(x=df_plot['phenotype'].values,y=df_plot[0].values, name='',
marker_color=cols, showlegend=False,
customdata=df_plot.values,
#texttemplate="%{y:0.0f}%",
textposition="outside",
textangle=0,
textfont_color="gray",
hovertemplate="<br>".join([
"Domain: %{customdata[2]}",
"Effect Size: r=%{customdata[1]:.2f}",
"Replication Probability: %{customdata[0]:.0f}%",
"Description: %{customdata[4]}"
])
#hovertext=pd.Categorical(df_plot['category'])
)
trace2l = go.Scatter( name='',
x=df_plot['phenotype'].values,
y=[0.074] * len(df_plot['phenotype'].values),
mode='lines',
line = dict(color='gray', width=1, dash='dash'),
showlegend=False,
hovertemplate="Significance threshold",
visible=False
)
trace2 = go.Bar(x=df_plot['phenotype'].values,y=df_plot['r_discovery'], visible=False, name='',
marker_color=cols, showlegend=False,
customdata=df_plot.values,
#texttemplate="%{y:0.2f}",
textposition="outside",
textangle=0,
textfont_color="gray",
hovertemplate="<br>".join([
"Domain: %{customdata[2]}",
"Effect Size: r=%{customdata[1]:.2f}",
"Replication Probability: %{customdata[0]:.0f}%",
"Description: %{customdata[4]}"
])
)
data = [trace1l, trace1, trace2l, trace2]
updatemenus = list([
dict(active=0,
showactive = True,
buttons=list([
dict(label = "Replication Probability",
method = "update",
args = [{"visible": [True, True, False, False]}]), # hide trace2
dict(label = "Predictive Effect Size",
method = "update",
args = [{"visible": [False, False, True, True]}]) # hide trace1
]),
x = 0,
xanchor = 'left',
y = 1.5,
yanchor = 'top',
)])
layout = dict(title="",
showlegend=True,
xaxis=dict(title="Phenotype",
showgrid=False, # thin lines in the background
#showticklabels=False, # numbers below),
zeroline=True # thick line at x=0
),
updatemenus=updatemenus,
template='simple_white',
font=dict(
size=10,
color="gray"),
autosize=True,
height=260,
margin=dict(
l=5,
r=5,
b=10,
t=10,
pad=4
),
)
fig=dict(data=data, layout=layout)
#plotly.offline.plot(fig)
#go.Figure(fig).show()
import plotly.io as pio
pio.write_html(fig, file='docs/_includes/figure.html', auto_open=True)
/tmp/ipykernel_19024/1079767499.py:9: FutureWarning: The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.
for i in df_plot.index:
df_plot.loc[i, 'phenotype_long'] = phenotypes.loc[phenotypes.columnHeader==i, 'fullDisplayName'].values[0]
df_plot
0 | r_discovery | category | phenotype | phenotype_long | |
---|---|---|---|---|---|
target | |||||
CogCrystalComp_AgeAdj | 100.000000 | 0.509356 | Cognition | CogCrystalComp_AgeAdj | NIH Toolbox Cognition Crystallized Composite: ... |
PicVocab_AgeAdj | 100.000000 | 0.474802 | Cognition | PicVocab_AgeAdj | NIH Toolbox Picture Vocabulary Test: Age-Adjus... |
CogTotalComp_AgeAdj | 100.000000 | 0.472652 | Cognition | CogTotalComp_AgeAdj | NIH Toolbox Cognition Total Composite Score: A... |
ReadEng_AgeAdj | 100.000000 | 0.450867 | Cognition | ReadEng_AgeAdj | NIH Toolbox Oral Reading Recognition Test: Age... |
CogEarlyComp_AgeAdj | 100.000000 | 0.350107 | Cognition | CogEarlyComp_AgeAdj | NIH Toolbox Cognition Early Childhood Composit... |
VSPLOT_TC | 100.000000 | 0.330419 | Cognition | VSPLOT_TC | Variable Short Penn Line Orientation: Total Nu... |
CogFluidComp_AgeAdj | 100.000000 | 0.304980 | Cognition | CogFluidComp_AgeAdj | NIH Toolbox Cognition Fluid Composite: Age Adj... |
PMAT24_A_CR | 100.000000 | 0.292243 | Cognition | PMAT24_A_CR | Penn Progressive Matrices: Number of Correct R... |
ListSort_AgeAdj | 100.000000 | 0.272622 | Cognition | ListSort_AgeAdj | NIH Toolbox List Sorting Working Memory Test: ... |
DDisc_AUC_200 | 100.000000 | 0.208845 | Cognition | DDisc_AUC_200 | Delay Discounting: Area Under the Curve for Di... |
PicSeq_AgeAdj | 100.000000 | 0.182331 | Cognition | PicSeq_AgeAdj | NIH Toolbox Picture Sequence Memory Test: Age-... |
CardSort_AgeAdj | 100.000000 | 0.178966 | Cognition | CardSort_AgeAdj | NIH Toolbox Dimensional Change Card Sort Test:... |
SCPT_SPEC | 100.000000 | 0.148358 | Cognition | SCPT_SPEC | Short Penn Continuous Performance Test: Specif... |
Flanker_AgeAdj | 98.876404 | 0.132261 | Cognition | Flanker_AgeAdj | NIH Toolbox Flanker Inhibitory Control and Att... |
ProcSpeed_AgeAdj | 84.615385 | 0.117641 | Cognition | ProcSpeed_AgeAdj | NIH Toolbox Pattern Comparison Processing Spee... |
IWRD_TOT | 36.538462 | 0.081018 | Cognition | IWRD_TOT | Penn Word Memory Test: Total Number of Correct... |
SCPT_SEN | 0.000000 | 0.004953 | Cognition | SCPT_SEN | Short Penn Continuous Performance Test: Sensit... |
NEOFAC_O | 100.000000 | 0.265051 | Personality | NEOFAC_O | NEO-FFI Openness to Experience (NEOFAC_O) |
NEOFAC_A | 100.000000 | 0.192264 | Personality | NEOFAC_A | NEO-FFI Agreeableness (NEOFAC_A) |
NEOFAC_C | 100.000000 | 0.179737 | Personality | NEOFAC_C | NEO-FFI Conscientiousness (NEOFAC_C) |
NEOFAC_E | 98.936170 | 0.157920 | Personality | NEOFAC_E | NEO-FFI Extraversion (NEOFAC_E) |
NEOFAC_N | 40.000000 | 0.073143 | Personality | NEOFAC_N | NEO-FFI Neuroticism (NEOFAC_N) |
PSQI_Score | 98.850575 | 0.142913 | Alertness | PSQI_Score | Sleep (Pittsburgh Sleep Questionnaire) Total S... |
MMSE_Score | 89.873418 | 0.112352 | Alertness | MMSE_Score | Mini Mental Status Exam Total Score |
Dexterity_AgeAdj | 98.924731 | 0.169838 | Motor | Dexterity_AgeAdj | NIH Toolbox 9-hole Pegboard Dexterity Test : A... |
Noise_Comp | 97.674419 | 0.137544 | Sensory | Noise_Comp | NIH Toolbox Words-In-Noise Age 6+: Computed Score |
Taste_AgeAdj | 43.103448 | 0.094419 | Sensory | Taste_AgeAdj | NIH Toolbox Regional Taste Intensity Age 12+ A... |
Mars_Final | 6.666667 | 0.081530 | Sensory | Mars_Final | Mars Final Contrast Sensitivity Score |
Odor_AgeAdj | 0.000000 | 0.034223 | Sensory | Odor_AgeAdj | NIH Toolbox Odor Identification Age 3+ Age-Adj... |
PainInterf_Tscore | 0.000000 | 0.032742 | Sensory | PainInterf_Tscore | NIH Toolbox Pain Interference Survey Age 18+: ... |
LifeSatisf_Unadj | 100.000000 | 0.198733 | Emotion | LifeSatisf_Unadj | NIH Toolbox General Life Satisfaction Survey: ... |
AngAggr_Unadj | 100.000000 | 0.194056 | Emotion | AngAggr_Unadj | NIH Toolbox Anger-Physical Aggression Survey: ... |
FearAffect_Unadj | 100.000000 | 0.155632 | Emotion | FearAffect_Unadj | NIH Toolbox Fear-Affect Survey: Unadjusted Sca... |
Sadness_Unadj | 94.047619 | 0.132782 | Emotion | Sadness_Unadj | NIH Toolbox Sadness Survey: Unadjusted Scale S... |
EmotSupp_Unadj | 96.551724 | 0.126029 | Emotion | EmotSupp_Unadj | NIH Toolbox Emotional Support Survey: Unadjust... |
Loneliness_Unadj | 71.666667 | 0.101812 | Emotion | Loneliness_Unadj | NIH Toolbox Loneliness Survey: Unadjusted Scal... |
PercStress_Unadj | 62.500000 | 0.100211 | Emotion | PercStress_Unadj | NIH Toolbox Perceived Stress Survey: Unadjuste... |
SelfEff_Unadj | 81.818182 | 0.099904 | Emotion | SelfEff_Unadj | NIH Toolbox Self-Efficacy Survey: Unadjusted S... |
MeanPurp_Unadj | 50.769231 | 0.099847 | Emotion | MeanPurp_Unadj | NIH Toolbox Meaning and Purpose Survey: Unadju... |
PosAffect_Unadj | 55.223881 | 0.098838 | Emotion | PosAffect_Unadj | NIH Toolbox Positive Affect Survey: Unadjusted... |
AngAffect_Unadj | 69.230769 | 0.096750 | Emotion | AngAffect_Unadj | NIH Toolbox Anger-Affect Survey: Unadjusted Sc... |
Friendship_Unadj | 44.000000 | 0.085313 | Emotion | Friendship_Unadj | NIH Toolbox Friendship Survey: Unadjusted Scal... |
ER40ANG | 44.680851 | 0.072909 | Emotion | ER40ANG | Penn Emotion Recognition Test: Number of Corre... |
AngHostil_Unadj | 41.025641 | 0.063366 | Emotion | AngHostil_Unadj | NIH Toolbox Anger-Hostility Survey: Unadjusted... |
FearSomat_Unadj | 14.285714 | 0.061101 | Emotion | FearSomat_Unadj | NIH Toolbox Fear-Somatic Arousal Survey: Unadj... |
ER40NOE | 16.666667 | 0.050150 | Emotion | ER40NOE | Penn Emotion Recognition Test: Number of Corre... |
InstruSupp_Unadj | 4.166667 | 0.041920 | Emotion | InstruSupp_Unadj | NIH Toolbox Instrumental Support Survey: Unadj... |
PercHostil_Unadj | 0.000000 | 0.037622 | Emotion | PercHostil_Unadj | NIH Toolbox Perceived Hostility Survey: Unadju... |
PercReject_Unadj | 4.347826 | 0.036887 | Emotion | PercReject_Unadj | NIH Toolbox Perceived Rejection Survey: Unadju... |
ER40FEAR | 0.000000 | 0.033788 | Emotion | ER40FEAR | Penn Emotion Recognition Test: Number of Corre... |
ER40SAD | 10.000000 | 0.033559 | Emotion | ER40SAD | Penn Emotion Recognition Test: Number of Corre... |
ER40HAP | NaN | -0.059106 | Emotion | ER40HAP | Penn Emotion Recognition Test: Number of Corre... |
tmp = behavior[['CogTotalComp_AgeAdj', 'CogFluidComp_AgeAdj', 'PMAT24_A_CR', 'Flanker_AgeAdj', 'CardSort_AgeAdj', 'PicSeq_AgeAdj']]
tmp.corr()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[9], line 1 ----> 1 tmp = behavior[['CogTotalComp_AgeAdj', 'CogFluidComp_AgeAdj', 'PMAT24_A_CR', 'Flanker_AgeAdj', 'CardSort_AgeAdj', 'PicSeq_AgeAdj']] 2 tmp.corr() NameError: name 'behavior' is not defined
import plotly.graph_objects as go
from plotly.subplots import make_subplots
labels = ["Replicable", "Not Replicable"]
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=labels, values=[15+4+2+1+1, 2+1+0+0+4], name="Trait-like", marker_colors=['green', 'red'], rotation=-90),
1, 1)
fig.add_trace(go.Pie(labels=labels, values=[6, 17], name="State-like", rotation=90),
1, 2)
# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.5, hoverinfo="label+percent+name")
fig.update_layout(
template='simple_white',
title_text="",
font=dict(
size=10,
color="gray"),
autosize=True,
height=70,
width=500,
margin=dict(
l=5,
r=5,
b=10,
t=10,
pad=4),
legend=dict(
yanchor="top",
y=0.99,
xanchor="left",
x=-0.4,
font_color='black'
),
# Add annotations in the center of the donut pies.
annotations=[dict(text='Trait-like', x=0.0, y=1.1, font_size=12, font_color='black', showarrow=False),
dict(text='State-like', x=0.72, y=1.1, font_size=12, font_color='black', showarrow=False)])
fig.show()
import plotly.io as pio
pio.write_html(fig, file='docs/_includes/pie.html', auto_open=False)