#!/usr/bin/env python # coding: utf-8 # # Analysis of the ABIDE dataset # # ### Imports # In[1]: import warnings import os from os.path import join import numpy as np import pandas as pd from scipy.stats import kurtosis, skew import matplotlib.pyplot as plt import seaborn as sns sns.set_style("whitegrid") from sklearn.model_selection import StratifiedKFold, GridSearchCV from sklearn.pipeline import Pipeline from sklearn.feature_selection import VarianceThreshold from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import quantile_transform from sklearn.metrics import roc_curve, RocCurveDisplay from nilearn.datasets.utils import _uncompress_file, _fetch_file from nilearn.connectome import ConnectivityMeasure from neurocombat_sklearn import CombatModel import statsmodels.api as sm from statsmodels.regression.linear_model import OLS from statsmodels.formula.api import ols as ols_f from mlconfound.stats import full_confound_test, partial_confound_test from mlconfound.plot import plot_graph from mlconfound.stats import _r2_cat_cont, _r2_cont_cont, _r2_cat_cat # ## Load data # In[2]: data_dir = '../data_in/ABIDE' url = 'https://osf.io/hc4md/download' # Download the zip file, first dl_file = _fetch_file(url, data_dir=data_dir) # Second, uncompress the downloaded zip file _uncompress_file(dl_file, verbose=2) # In[3]: def _get_paths(phenotypic, atlas, timeseries_dir): """ """ timeseries = [] IDs_subject = [] diagnosis = [] subject_ids = phenotypic['SUB_ID'] mean_fd = [] num_fd = [] perc_fd = [] site = [] for index, subject_id in enumerate(subject_ids): this_pheno = phenotypic[phenotypic['SUB_ID'] == subject_id] this_timeseries = join(timeseries_dir, atlas, str(subject_id) + '_timeseries.txt') if os.path.exists(this_timeseries): timeseries.append(np.loadtxt(this_timeseries)) IDs_subject.append(subject_id) diagnosis.append(this_pheno['DX_GROUP'].values[0]) mean_fd.append(this_pheno['func_mean_fd'].values[0]) num_fd.append(this_pheno['func_num_fd'].values[0]) perc_fd.append(this_pheno['func_perc_fd'].values[0]) site.append(this_pheno['SITE_ID'].values[0]) return timeseries, diagnosis, IDs_subject, mean_fd, num_fd, perc_fd, site # Download the phenotypic summary information file form the preprocessed connectomes project. # - First read: # http://preprocessed-connectomes-project.org/abide/download.html # - Then download: # https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative/Phenotypic_V1_0b_preprocessed1.csv # - Copy the csv file into the data_in/ABIDE directory # In[4]: phenotypic = pd.read_csv('../data_in/ABIDE/Phenotypic_V1_0b_preprocessed1.csv').iloc[:,2:] timeseries, diagnosis, IDs_subject, mean_fd, num_fd, perc_fd, site = _get_paths(phenotypic, "BASC/regions", '../data_in/ABIDE/') sites, site_int = np.unique(site, return_inverse=True) phenotypic # In[5]: sns.histplot(mean_fd, color='gray') plt.savefig('../data_out/fig/abide_motion_hist.pdf') kurtosis(mean_fd), skew(mean_fd) # In[6]: sns.histplot(np.log(mean_fd), color='gray') kurtosis(np.log(mean_fd)), skew(np.log(mean_fd)) # In[7]: rng = np.random.default_rng(42) mean_fd_trf = quantile_transform(np.array([mean_fd]).T, output_distribution='normal', n_quantiles=len(mean_fd)).flatten() sns.histplot(mean_fd_trf, color='gray') plt.savefig('../data_out/fig/abide_motion_quanttrf_hist.pdf') kurtosis(mean_fd_trf), skew(mean_fd_trf) mean_fd = mean_fd_trf # ## Binning motion data (to be used later) # In[8]: # binning mean_fd bins = 10 # approximately 80 subject per motion group limits = np.quantile(mean_fd, np.arange(0, 1, 1/bins)) mean_fd_binned = np.digitize(mean_fd, limits) # ## Calculate connectivity # In[9]: connections = ConnectivityMeasure(kind='tangent', vectorize=True, discard_diagonal=True) conn_coefs = connections.fit_transform(timeseries) # In[10]: _, y = np.unique(diagnosis, return_inverse=True) X = conn_coefs # # Machine learning (raw) # In[11]: outer_cv = StratifiedKFold(10) inner_cv = StratifiedKFold(10) model = Pipeline([ ('varthr', VarianceThreshold(0)), # omit zero variance columns (diagonal) ('model', LogisticRegression())]) p_grid = {'model__C': [0.1, 1, 10]} clf = GridSearchCV(estimator=model, param_grid=p_grid, cv=StratifiedKFold(10), scoring="roc_auc", return_train_score=False, n_jobs=-1) all_models = [] best_params = [] predicted = np.zeros(len(y)) predicted_prob = np.zeros(len(y)) nested_scores_train = np.zeros(outer_cv.get_n_splits(X)) nested_scores_test = np.zeros(outer_cv.get_n_splits(X)) print("model\tinner_cv mean score\touter vc score") i=0 for train, test in outer_cv.split(X, y): clf.fit(X[train], y[train]) print('cv:', i, str(clf.best_params_) + " " + str(clf.best_score_) + " " + str(clf.score(X[test], y[test]))) all_models.append(clf.best_estimator_) best_params.append(clf.best_params_) predicted[test] = clf.predict(X[test]) predicted_prob[test] = clf.predict_proba(X[test])[:,0] nested_scores_train[i] = clf.best_score_ nested_scores_test[i] = clf.score(X[test], y[test]) i = i+1 # In[12]: print("** Mean score in the inner crossvaludation (inner_cv):\t" + str(nested_scores_train.mean())) print("** Mean Nested Crossvalidation Score (outer_cv):\t" + str(nested_scores_test.mean())) fpr, tpr, _ = roc_curve(y, predicted_prob, pos_label=0) fig, ax = plt.subplots(figsize=(5,2)) RocCurveDisplay(fpr=fpr, tpr=tpr).plot(ax=ax) plt.savefig('../data_out/fig/abide_raw_rocplot.pdf') # In[13]: plt.figure(figsize=(5,1.5)) pal=[sns.color_palette("coolwarm", 10)[-1], sns.color_palette("coolwarm", 10)[0]] sns.stripplot(x=site, y=predicted_prob, hue=y, palette=pal, alpha=0.2, jitter=0.2, dodge=True) ax=sns.boxplot(x=site, y=predicted_prob, hue=y, showfliers = False) for box in ax.artists: box.set_facecolor((1,1,1,0)) plt.xticks(rotation=90) for i in range(len(np.unique(site))): plt.axvline(i+0.5, color="gray", alpha=0.5, linewidth=0.5) plt.savefig('../data_out/fig/abide_site_raw_striplot.pdf') # In[14]: plt.figure(figsize=(5,2)) sns.stripplot(x=y, y=predicted_prob, hue=mean_fd_binned, palette="coolwarm", alpha=0.4, jitter=0.4, dodge=True) sns.boxplot(x=y, y=predicted_prob, color=(1,1,1,1)) plt.legend([],[], frameon=False) plt.savefig('../data_out/fig/abide_motion_raw_stripplot.pdf') # In[15]: plot_graph(partial_confound_test(y, predicted_prob, mean_fd, cat_y=True, cat_yhat=False, random_state=42), outfile_base='../data_out/fig/abide_motion_raw_partial') # In[16]: plot_graph(full_confound_test(y, predicted_prob, mean_fd, cat_y=True, cat_yhat=False, random_state=42), outfile_base='../data_out/fig/abide_motion_raw_full') # In[17]: nulldist = [] unpermuted = _r2_cont_cont(predicted_prob, mean_fd) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cont_cont(yperm, mean_fd)) (nulldist >= unpermuted).sum()/len(nulldist) # In[18]: nulldist = [] unpermuted = _r2_cat_cont(y, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(y, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # In[19]: plot_graph(partial_confound_test(y, predicted_prob, site_int, cat_y=True, cat_yhat=False, cat_c=True, random_state=42), outfile_base='../data_out/fig/abide_site_raw_partial') # In[20]: plot_graph(full_confound_test(y, predicted_prob, site_int, cat_y=True, cat_yhat=False, cat_c=True, random_state=42), outfile_base='../data_out/fig/abide_site_raw_full') # In[21]: nulldist = [] unpermuted = _r2_cat_cont(site_int, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(site_int, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # In[22]: nulldist = [] unpermuted = _r2_cat_cont(y, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(y, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # # Regress out motion from the feature # In[23]: # regress-out motion from connectivity X_adj = np.zeros_like(X) for i in range(X.shape[1]): OLS_model = OLS(X[:,i], sm.add_constant(mean_fd)).fit() # training the model X_adj[:, i] = OLS_model.resid # In[24]: outer_cv = StratifiedKFold(10) inner_cv = StratifiedKFold(10) model = Pipeline([ ('varthr', VarianceThreshold(0)), # omit zero variance columns (diagonal) #('fsel', SelectKBest(f_regression)), ('model', LogisticRegression())]) p_grid = {#'fsel__k': [500, 1000, 2000], 'model__C': [0.1, 1, 10]} clf = GridSearchCV(estimator=model, param_grid=p_grid, cv=StratifiedKFold(10), scoring="roc_auc", return_train_score=False, n_jobs=-1) all_models = [] best_params = [] predicted = np.zeros(len(y)) predicted_prob = np.zeros(len(y)) nested_scores_train = np.zeros(outer_cv.get_n_splits(X_adj)) nested_scores_test = np.zeros(outer_cv.get_n_splits(X_adj)) print("model\tinner_cv mean score\touter vc score") i=0 for train, test in outer_cv.split(X, y): clf.fit(X_adj[train], y[train]) print('cv:', i, str(clf.best_params_) + " " + str(clf.best_score_) + " " + str(clf.score(X_adj[test], y[test]))) all_models.append(clf.best_estimator_) best_params.append(clf.best_params_) predicted[test] = clf.predict(X_adj[test]) predicted_prob[test] = clf.predict_proba(X_adj[test])[:,0] nested_scores_train[i] = clf.best_score_ nested_scores_test[i] = clf.score(X_adj[test], y[test]) i = i+1 # In[25]: print("** Mean score in the inner crossvaludation (inner_cv):\t" + str(nested_scores_train.mean())) print("** Mean Nested Crossvalidation Score (outer_cv):\t" + str(nested_scores_test.mean())) fpr, tpr, _ = roc_curve(y, predicted_prob, pos_label=0) fig, ax = plt.subplots(figsize=(5,2)) RocCurveDisplay(fpr=fpr, tpr=tpr).plot(ax=ax) plt.savefig('../data_out/fig/abide_motion_reg_rocplot.pdf') # In[26]: plt.figure(figsize=(5,2)) sns.stripplot(x=y, y=predicted_prob, hue=mean_fd_binned, palette="coolwarm", alpha=0.4, jitter=0.4, dodge=True) sns.boxplot(x=y, y=predicted_prob, color=(1,1,1,1)) plt.legend([],[], frameon=False) plt.savefig('../data_out/fig/abide_motion_reg_stripplot.pdf') # In[27]: plot_graph(partial_confound_test(y, predicted_prob, mean_fd, cat_y=True, cat_yhat=False, random_state=42), outfile_base='../data_out/fig/abide_motion_reg_partial') # In[28]: plot_graph(full_confound_test(y, predicted_prob, mean_fd, cat_y=True, cat_yhat=False, random_state=42), outfile_base='../data_out/fig/abide_motion_reg_full') # In[29]: nulldist = [] unpermuted = _r2_cont_cont(predicted_prob, mean_fd) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cont_cont(yperm, mean_fd)) (nulldist >= unpermuted).sum()/len(nulldist) # In[30]: nulldist = [] unpermuted = _r2_cat_cont(y, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(y, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # # Regress out site # In[31]: # regress-out acquisition from connectivity X_adj = np.zeros_like(X) for i in range(X.shape[1]): tmp = pd.DataFrame({ 'x': site_int, 'y': X[:,i] }) OLS_model = ols_f("y ~ C(x)", tmp).fit() # training the model X_adj[:, i] = OLS_model.resid.values # In[32]: outer_cv = StratifiedKFold(10) inner_cv = StratifiedKFold(10) model = Pipeline([ ('varthr', VarianceThreshold(0)), # omit zero variance columns (diagonal) #('fsel', SelectKBest(f_regression)), ('model', LogisticRegression())]) p_grid = {#'fsel__k': [500, 1000, 2000], 'model__C': [0.1, 1, 10]} clf = GridSearchCV(estimator=model, param_grid=p_grid, cv=StratifiedKFold(10), scoring="roc_auc", return_train_score=False, n_jobs=-1) all_models = [] best_params = [] predicted = np.zeros(len(y)) predicted_prob = np.zeros(len(y)) nested_scores_train = np.zeros(outer_cv.get_n_splits(X_adj)) nested_scores_test = np.zeros(outer_cv.get_n_splits(X_adj)) print("model\tinner_cv mean score\touter vc score") i=0 for train, test in outer_cv.split(X, y): clf.fit(X_adj[train], y[train]) print('cv:', i, str(clf.best_params_) + " " + str(clf.best_score_) + " " + str(clf.score(X_adj[test], y[test]))) all_models.append(clf.best_estimator_) best_params.append(clf.best_params_) predicted[test] = clf.predict(X_adj[test]) predicted_prob[test] = clf.predict_proba(X_adj[test])[:,0] nested_scores_train[i] = clf.best_score_ nested_scores_test[i] = clf.score(X_adj[test], y[test]) i = i+1 # In[33]: print("** Mean score in the inner crossvaludation (inner_cv):\t" + str(nested_scores_train.mean())) print("** Mean Nested Crossvalidation Score (outer_cv):\t" + str(nested_scores_test.mean())) fpr, tpr, _ = roc_curve(y, predicted_prob, pos_label=0) fig, ax = plt.subplots(figsize=(5,2)) RocCurveDisplay(fpr=fpr, tpr=tpr).plot(ax=ax) plt.savefig('../data_out/fig/abide_site_reg_rocplot.pdf') # In[34]: plt.figure(figsize=(5,1.5)) pal=[sns.color_palette("coolwarm", 10)[-1], sns.color_palette("coolwarm", 10)[0]] sns.stripplot(x=site, y=predicted_prob, hue=y, palette=pal, alpha=0.2, jitter=0.2, dodge=True) ax=sns.boxplot(x=site, y=predicted_prob, hue=y, showfliers = False) for box in ax.artists: box.set_facecolor((1,1,1,0)) plt.xticks(rotation=90) for i in range(len(np.unique(site))): plt.axvline(i+0.5, color="gray", alpha=0.5, linewidth=0.5) plt.savefig('../data_out/fig/abide_site_reg_striplot.pdf') # In[35]: plot_graph(partial_confound_test(y, predicted_prob, site_int, cat_y=True, cat_yhat=False, cat_c=True, random_state=42), outfile_base='../data_out/fig/abide_site_reg_partial') # In[36]: plot_graph(full_confound_test(y, predicted_prob, site_int, cat_y=True, cat_yhat=False, cat_c=True, random_state=42), outfile_base='../data_out/fig/abide_site_reg_full') # In[37]: nulldist = [] unpermuted = _r2_cat_cont(site_int, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(site_int, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # In[38]: nulldist = [] unpermuted = _r2_cat_cont(y, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(y, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # # Combat on binned motion data # In[39]: outer_cv = StratifiedKFold(10) inner_cv = StratifiedKFold(10) model = Pipeline([ ('varthr', VarianceThreshold(0)), # omit zero variance columns (diagonal) #('fsel', SelectKBest(f_regression)), ('model', LogisticRegression())]) p_grid = {#'fsel__k': [500, 1000, 2000], 'model__C': [0.1, 1, 10]} clf = GridSearchCV(estimator=model, param_grid=p_grid, cv=StratifiedKFold(10), scoring="roc_auc", return_train_score=False, n_jobs=-1) all_models = [] best_params = [] predicted = np.zeros(len(y)) predicted_prob = np.zeros(len(y)) nested_scores_train = np.zeros(outer_cv.get_n_splits(X)) nested_scores_test = np.zeros(outer_cv.get_n_splits(X)) print("model\tinner_cv mean score\touter vc score") i=0 for train, test in outer_cv.split(X, y): comb = CombatModel() X_train_combat = comb.fit_transform(X[:,np.sum(X,0)!=0][train], np.array([mean_fd_binned[train]]).transpose() ) clf.fit(X_train_combat, y[train]) X_test_combat = comb.transform(X[:,np.sum(X,0)!=0][test], np.array([mean_fd_binned[test]]).transpose()) print('cv:', i, str(clf.best_params_) + " " + str(clf.best_score_) + " " + str(clf.score(X_test_combat, y[test]))) all_models.append(clf.best_estimator_) best_params.append(clf.best_params_) predicted[test] = clf.predict(X_test_combat) predicted_prob[test] = clf.predict_proba(X_test_combat)[:,0] nested_scores_train[i] = clf.best_score_ nested_scores_test[i] = clf.score(X_test_combat, y[test]) i = i+1 # In[40]: print("** Mean score in the inner crossvaludation (inner_cv):\t" + str(nested_scores_train.mean())) print("** Mean Nested Crossvalidation Score (outer_cv):\t" + str(nested_scores_test.mean())) fpr, tpr, _ = roc_curve(y, predicted_prob, pos_label=0) fig, ax = plt.subplots(figsize=(5,2)) RocCurveDisplay(fpr=fpr, tpr=tpr).plot(ax=ax) plt.savefig('../data_out/fig/abide_motion_comb_rocplot.pdf') # In[41]: plt.figure(figsize=(5,2)) sns.stripplot(x=y, y=predicted_prob, hue=mean_fd_binned, palette="coolwarm", alpha=0.4, jitter=0.4, dodge=True) plt.legend([],[], frameon=False) sns.boxplot(x=y, y=predicted_prob, color=(1,1,1,1)) plt.savefig('../data_out/fig/abide_motion_comb_stripplot.pdf') # In[42]: plot_graph(partial_confound_test(y, predicted_prob, mean_fd, cat_y=True, cat_yhat=False, random_state=42), outfile_base='../data_out/fig/abide_motion_comb_partial') # In[43]: plot_graph(full_confound_test(y, predicted_prob, mean_fd, cat_y=True, cat_yhat=False, random_state=42), outfile_base='../data_out/fig/abide_motion_comb_full') # In[44]: nulldist = [] unpermuted = _r2_cont_cont(predicted_prob, mean_fd) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cont_cont(yperm, mean_fd)) (nulldist >= unpermuted).sum()/len(nulldist) # In[45]: nulldist = [] unpermuted = _r2_cat_cont(y, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(y, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # # Combat on site # In[46]: outer_cv = StratifiedKFold(10, shuffle=True, random_state=42) inner_cv = StratifiedKFold(10, shuffle=True, random_state=42) model = Pipeline([ ('varthr', VarianceThreshold(0)), # omit zero variance columns (diagonal) #('fsel', SelectKBest(f_regression)), ('model', LogisticRegression())]) p_grid = {#'fsel__k': [500, 1000, 2000], 'model__C': [0.1, 1, 10]} clf = GridSearchCV(estimator=model, param_grid=p_grid, cv=StratifiedKFold(10), scoring="roc_auc", return_train_score=False, n_jobs=-1) all_models = [] best_params = [] predicted = np.zeros(len(y)) predicted_prob = np.zeros(len(y)) nested_scores_train = np.zeros(outer_cv.get_n_splits(X)) nested_scores_test = np.zeros(outer_cv.get_n_splits(X)) print("model\tinner_cv mean score\touter vc score") i=0 for train, test in outer_cv.split(X, y): comb = CombatModel() X_train_combat = comb.fit_transform(X[:,np.sum(X,0)!=0][train], np.array([site_int[train]]).transpose() ) clf.fit(X_train_combat, y[train]) X_test_combat = comb.transform(X[:,np.sum(X,0)!=0][test], np.array([site_int[test]]).transpose()) print('cv:', i, str(clf.best_params_) + " " + str(clf.best_score_) + " " + str(clf.score(X_test_combat, y[test]))) all_models.append(clf.best_estimator_) best_params.append(clf.best_params_) predicted[test] = clf.predict(X_test_combat) predicted_prob[test] = clf.predict_proba(X_test_combat)[:,0] nested_scores_train[i] = clf.best_score_ nested_scores_test[i] = clf.score(X_test_combat, y[test]) i = i+1 # In[47]: print("** Mean score in the inner crossvaludation (inner_cv):\t" + str(nested_scores_train.mean())) print("** Mean Nested Crossvalidation Score (outer_cv):\t" + str(nested_scores_test.mean())) fpr, tpr, _ = roc_curve(y, predicted_prob, pos_label=0) fig, ax = plt.subplots(figsize=(5,2)) RocCurveDisplay(fpr=fpr, tpr=tpr).plot(ax=ax) plt.savefig('../data_out/fig/abide_site_comb_rocplot.pdf') # In[48]: plt.figure(figsize=(5,1.5)) pal=[sns.color_palette("coolwarm", 10)[-1], sns.color_palette("coolwarm", 10)[0]] sns.stripplot(x=site, y=predicted_prob, hue=y, palette=pal, alpha=0.2, jitter=0.2, dodge=True) ax=sns.boxplot(x=site, y=predicted_prob, hue=y, showfliers = False) for box in ax.artists: box.set_facecolor((1,1,1,0)) plt.xticks(rotation=90) for i in range(len(np.unique(site))): plt.axvline(i+0.5, color="gray", alpha=0.5, linewidth=0.5) plt.savefig('../data_out/fig/abide_site_comb_striplot.pdf') # In[49]: plot_graph(partial_confound_test(y, predicted_prob, site_int, cat_y=True, cat_yhat=False, cat_c=True, random_state=42), outfile_base='../data_out/fig/abide_site_comb_partial') # In[50]: plot_graph(full_confound_test(y, predicted_prob, site_int, cat_y=True, cat_yhat=False, cat_c=True, random_state=42), outfile_base='../data_out/fig/abide_site_comb_full') # In[51]: nulldist = [] unpermuted = _r2_cat_cont(site_int, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(site_int, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # In[52]: nulldist = [] unpermuted = _r2_cat_cont(y, predicted_prob) print(unpermuted) for i in range(1000): yperm = np.random.permutation(predicted_prob) nulldist.append(_r2_cat_cont(y, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # In[53]: sns.color_palette("coolwarm", as_cmap=True) # In[54]: np.max(mean_fd) # In[54]: # In[55]: nulldist = [] unpermuted = _r2_cat_cont(y, mean_fd) print(unpermuted) for i in range(1000): yperm = np.random.permutation(y) nulldist.append(_r2_cat_cont(yperm, mean_fd)) (nulldist >= unpermuted).sum()/len(nulldist) # In[56]: nulldist = [] unpermuted = _r2_cat_cat(site_int, y) print(unpermuted) for i in range(1000): yperm = np.random.permutation(y) nulldist.append(_r2_cat_cat(site_int, yperm)) (nulldist >= unpermuted).sum()/len(nulldist) # In[56]: