The HCP dataset comprises task-based fMRI from a large sample of human subjects. The NMA-curated dataset includes time series data that has been preprocessed and spatially-downsampled by aggregating within 360 regions of interest.
In order to use this dataset, please electronically sign the HCP data use terms at ConnectomeDB. Instructions for this are on pp. 24-25 of the HCP Reference Manual.
In this notebook, NMA provides code for downloading the data and doing some basic visualisation and processing.
# @title Install dependencies
!pip install pandas --quiet
!pip install seaborn --quiet
!pip install nilearn --quiet
|████████████████████████████████| 9.6 MB 3.5 MB/s
|████████████████████████████████| 38.1 MB 1.2 MB/s
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from nilearn import plotting, datasets
# @title Figure settings
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use("https://raw.githubusercontent.com/NeuromatchAcademy/course-content/main/nma.mplstyle")
# The download cells will store the data in nested directories starting here:
HCP_DIR = "./hcp"
if not os.path.isdir(HCP_DIR):
os.mkdir(HCP_DIR)
# The data shared for NMA projects is a subset of the full HCP dataset
N_SUBJECTS = 339
# The data have already been aggregated into ROIs from the Glasser parcellation
N_PARCELS = 360
# The acquisition parameters for all tasks were identical
TR = 0.72 # Time resolution, in seconds
# The parcels are matched across hemispheres with the same order
HEMIS = ["Right", "Left"]
# Each experiment was repeated twice in each subject
N_RUNS = 2
# There are 7 tasks. Each has a number of 'conditions'
EXPERIMENTS = {
'MOTOR' : {'runs': [5,6], 'cond':['lf','rf','lh','rh','t','cue']},
'WM' : {'runs': [7,8], 'cond':['0bk_body','0bk_faces','0bk_places','0bk_tools','2bk_body','2bk_faces','2bk_places','2bk_tools']},
'EMOTION' : {'runs': [9,10], 'cond':['fear','neut']},
'GAMBLING' : {'runs': [11,12], 'cond':['loss','win']},
'LANGUAGE' : {'runs': [13,14], 'cond':['math','story']},
'RELATIONAL' : {'runs': [15,16], 'cond':['match','relation']},
'SOCIAL' : {'runs': [17,18], 'cond':['mental','rnd']}
}
# You may want to limit the subjects used during code development.
# This will use all subjects:
subjects = range(N_SUBJECTS)
For a detailed description of the tasks have a look pages 45-54 of the HCP reference manual.
The task data are shared in different files, but they will unpack into the same directory structure.
# @title Download the data
import os, requests, tarfile
fname = "hcp_task.tgz"
url = "https://osf.io/s4h8j/download/"
if not os.path.isfile(fname):
try:
r = requests.get(url)
except requests.ConnectionError:
print("!!! Failed to download data !!!")
else:
if r.status_code != requests.codes.ok:
print("!!! Failed to download data !!!")
else:
print(f"Downloading {fname}...")
with open(fname, "wb") as fid:
fid.write(r.content)
print(f"Download {fname} completed!")
Downloading hcp_task.tgz... Download hcp_task.tgz completed!
# @title Extract the data in `HCP_DIR`
fname_ex = "hcp_task"
path_name = os.path.join(HCP_DIR, fname_ex)
if not os.path.exists(path_name):
print(f"Extracting {fname_ex}.tgz...")
with tarfile.open(f"{fname_ex}.tgz") as fzip:
fzip.extractall(HCP_DIR)
else:
print(f"File {fname_ex}.tgz has already been extracted.")
Extracting hcp_task.tgz...
Downloading this dataset will create the regions.npy
file, which contains the region name and network assignment for each parcel.
Detailed information about the name used for each region is provided in the Supplement to Glasser et al. 2016.
Information about the network parcellation is provided in Ji et al, 2019.
regions = np.load(os.path.join(HCP_DIR, "hcp_task", "regions.npy")).T
region_info = dict(name=regions[0].tolist(),
network=regions[1],
hemi=['Right']*int(N_PARCELS/2) + ['Left']*int(N_PARCELS/2))
We provide two helper functions: one for loading the time series from a single suject and a single run, and one for loading an EV file for each task.
An EV file (EV:Explanatory Variable) describes the task experiment in terms of stimulus onset, duration, and amplitude. These can be used to model the task time series data.
def load_single_timeseries(subject, experiment, run, dir, remove_mean=True):
"""Load timeseries data for a single subject and single run.
Args:
subject (int): 0-based subject ID to load
experiment (str): Name of experiment
run (int): 0-based run index, across all tasks
remove_mean (bool): If True, subtract the parcel-wise mean (typically the mean BOLD signal is not of interest)
Returns
ts (n_parcel x n_timepoint array): Array of BOLD data values
"""
bold_run = EXPERIMENTS[experiment]['runs'][run]
bold_path = os.path.join(dir, "subjects", str(subject), "timeseries")
bold_file = f"bold{bold_run}_Atlas_MSMAll_Glasser360Cortical.npy"
ts = np.load(os.path.join(bold_path, bold_file))
if remove_mean:
ts -= ts.mean(axis=1, keepdims=True)
return ts
def load_evs(subject, experiment, run, dir):
"""Load EVs (explanatory variables) data for one task experiment.
Args:
subject (int): 0-based subject ID to load
experiment (str) : Name of experiment
run (int) : 0-based run index, across all tasks
Returns
evs (list of lists): A list of frames associated with each condition
"""
frames_list = []
task_key = 'tfMRI_' + experiment + '_' + ['RL', 'LR'][run]
for cond in EXPERIMENTS[experiment]['cond']:
ev_file = os.path.join(dir, "subjects", str(subject), "EVs",
str(task_key), f"{cond}.txt")
ev_array = np.loadtxt(ev_file, ndmin=2, unpack=True)
ev = dict(zip(["onset", "duration", "amplitude"], ev_array))
# Determine when trial starts, rounded down
start = np.floor(ev["onset"] / TR).astype(int)
# Use trial duration to determine how many frames to include for trial
duration = np.ceil(ev["duration"] / TR).astype(int)
# Take the range of frames that correspond to this specific trial
frames = [s + np.arange(0, d) for s, d in zip(start, duration)]
frames_list.append(frames)
return frames_list
Let's load the timeseries data for the MOTOR experiment from a single subject and a single run
my_exp = 'MOTOR'
my_subj = 0
my_run = 1
data = load_single_timeseries(subject=my_subj,
experiment=my_exp,
run=my_run,
dir=os.path.join(HCP_DIR, "hcp_task"),
remove_mean=True)
print(data.shape)
(360, 284)
As you can see the time series data contains 284 time points in 360 regions of interest (ROIs).
Now in order to understand how to model these data, we need to relate the time series to the experimental manipulation. This is described by the EV files. Let us load the EVs for this experiment.
evs = load_evs(subject=my_subj, experiment=my_exp,run=my_run, dir=os.path.join(HCP_DIR, "hcp_task"))
For the motor task, this evs variable contains a list of 5 arrays corresponding to the 5 conditions.
Now let's use these evs to compare the average activity during the left foot ('lf') and right foot ('rf') conditions:
# we need a little function that averages all frames from any given condition
def average_frames(data, evs, experiment, cond):
idx = EXPERIMENTS[experiment]['cond'].index(cond)
return np.mean(np.concatenate([np.mean(data[:, evs[idx][i]], axis=1, keepdims=True) for i in range(len(evs[idx]))], axis=-1), axis=1)
lf_activity = average_frames(data, evs, my_exp, 'lf')
rf_activity = average_frames(data, evs, my_exp, 'rf')
contrast = lf_activity-rf_activity # difference between left and right hand movement
# Plot activity level in each ROI for both conditions
plt.figure()
plt.plot(lf_activity, label='left foot')
plt.plot(rf_activity, label='right foot')
plt.xlabel('ROI')
plt.ylabel('activity')
plt.legend()
plt.show()
Now let's plot these activity vectors. We will also make use of the ROI names to find out which brain areas show highest activity in these conditions. But since there are so many areas, we will group them by network.
A powerful tool for organising and plotting this data is the combination of pandas and seaborn. Below is an example where we use pandas to create a table for the activity data and we use seaborn oto visualise it.
df = pd.DataFrame({'lf_activity': lf_activity,
'rf_activity': rf_activity,
'network': region_info['network'],
'hemi': region_info['hemi']
})
fig, (ax1, ax2) = plt.subplots(1, 2)
sns.barplot(y='network', x='lf_activity', data=df, hue='hemi',ax=ax1)
sns.barplot(y='network', x='rf_activity', data=df, hue='hemi',ax=ax2)
fig.show()
You should be able to notice that for the somatosensory network, brain activity in the right hemi is higher for the left foot movement and vice versa for the left hemi and right foot. But this may be suubtle at the single subject/session level (these are a quick 3-4min scans).
Let us boost thee stats by averaging across all subjects and runs.
group_contrast = 0
for s in subjects:
for r in [0,1]:
data = load_single_timeseries(subject=s, experiment=my_exp, run=r,
dir=os.path.join(HCP_DIR, "hcp_task"),
remove_mean=True)
evs = load_evs(subject=s, experiment=my_exp, run=r,
dir=os.path.join(HCP_DIR, "hcp_task"))
lf_activity = average_frames(data, evs, my_exp, 'lf')
rf_activity = average_frames(data, evs, my_exp, 'rf')
contrast = lf_activity - rf_activity
group_contrast += contrast
group_contrast /= (len(subjects)*2) # remember: 2 sessions per subject
df = pd.DataFrame({'contrast': group_contrast,
'network': region_info['network'],
'hemi': region_info['hemi']
})
# we will plot the left foot minus right foot contrast so we only need one plot
plt.figure()
sns.barplot(y='network', x='contrast', data=df, hue='hemi')
plt.show()
Finally, we will visualise these resuts on the cortical surface of an average brain.
# @title Download `atlas.npz`
import os, requests, tarfile
fname = "atlas.npz"
url = "https://osf.io/j5kuc/download"
if not os.path.isfile(fname):
try:
r = requests.get(url)
except requests.ConnectionError:
print("!!! Failed to download data !!!")
else:
if r.status_code != requests.codes.ok:
print("!!! Failed to download data !!!")
else:
print(f"Downloading {fname}...")
with open(fname, "wb") as fid:
fid.write(r.content)
print(f"Download {fname} completed!")
Downloading atlas.npz... Download atlas.npz completed!
# This uses the nilearn package
with np.load(fname) as dobj:
atlas = dict(**dobj)
# Try both hemispheres (L->R and left->right)
fsaverage = datasets.fetch_surf_fsaverage()
surf_contrast = group_contrast[atlas["labels_L"]]
plotting.view_surf(fsaverage['infl_left'],
surf_contrast,
vmax=20)