Notebook

Cluster CellTypist L3 Myeloid cells¶

In [1]:

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

from datetime import date
import hisepy
import os
import pandas as pd
import re
import scanpy as sc

In [2]:

out_dir = 'output'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

Helper functions¶

In [3]:

def cache_uuid_path(uuid):
    cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
    if not os.path.isdir(cache_path):
        hise_res = hisepy.reader.cache_files([uuid])
    filename = os.listdir(cache_path)[0]
    cache_file = '{p}/{f}'.format(p = cache_path, f = filename)
    return cache_file

In [4]:

def read_csv_uuid(uuid):
    cache_file = cache_uuid_path(uuid)
    res = pd.read_csv(cache_file)
    return res

In [5]:

def read_adata_uuid(uuid):
    cache_file = cache_uuid_path(uuid)
    res = sc.read_h5ad(cache_file)
    return res

In [6]:

def rm_cache_uuid(uuid):
    cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
    rm_call = 'rm -r {d}'.format(d = cache_path)
    os.system(rm_call)

In [7]:

def format_cell_type(cell_type):
    cell_type = re.sub('\\+', 'pos', cell_type)
    cell_type = re.sub('-', 'neg', cell_type)
    cell_type = re.sub(' ', '_', cell_type)
    return cell_type

In [8]:

def element_id(n = 3):
    import periodictable
    from random import randrange
    rand_el = []
    for i in range(n):
        el = randrange(0,118)
        rand_el.append(periodictable.elements[el].name)
    rand_str = '-'.join(rand_el)
    return rand_str

In [9]:

def process_adata(adata, resolution = 2):
    
    # Keep a copy of the raw data
    adata = adata.raw.to_adata()
    adata.raw = adata

    print('Normalizing', end = "; ")
    # Normalize and log transform
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)

    print('Finding HVGs', end = "; ")
    # Restrict downstream steps to variable genes
    sc.pp.highly_variable_genes(adata)
    adata = adata[:, adata.var_names[adata.var['highly_variable']]].copy()

    print('Scaling', end = "; ")
    # Scale variable genes
    sc.pp.scale(adata)

    print('PCA', end = "; ")
    # Run PCA
    sc.tl.pca(adata, svd_solver = 'arpack')
    
    print('Neighbors', end = "; ")
    # Find nearest neighbors
    sc.pp.neighbors(
        adata, 
        n_neighbors = 50,
        n_pcs = 30
    )

    print('Leiden', end = "; ")
    # Find clusters
    sc.tl.leiden(
        adata, 
        resolution = resolution, 
        key_added = 'leiden_{r}'.format(r = resolution),
        n_iterations = 2
    )

    print('UMAP', end = "; ")
    # Run UMAP
    sc.tl.umap(adata, min_dist = 0.05)
    
    print('Renormalizing')
    adata = adata.raw.to_adata()
    adata.raw = adata

    # Normalize and log transform
    sc.pp.normalize_total(adata)
    sc.pp.log1p(adata)
    
    return adata

Read cell type hierarchy¶

In [10]:

hierarchy_uuid = '1a44252c-8cab-4c8f-92c9-d8f3af633790'
hierarchy_df = read_csv_uuid(hierarchy_uuid)

downloading fileID: 1a44252c-8cab-4c8f-92c9-d8f3af633790
Files have been successfully downloaded!

In [11]:

hierarchy_df['AIFI_L1'].unique()

Out[11]:

array(['B cell', 'DC', 'Erythrocyte', 'ILC', 'Monocyte', 'NK cell',
       'Platelet', 'Progenitor cell', 'T cell'], dtype=object)

Identify files for use in HISE¶

In [12]:

search_id = 'nitrogen-rhenium-hafnium'
l1_types = ['DC','Monocyte']

Get L3 cell types in the format used for filenames

In [13]:

l3_types = hierarchy_df['AIFI_L3'].loc[hierarchy_df['AIFI_L1'].isin(l1_types)]
l3_types = l3_types.tolist()

In [14]:

l3_file_types = [format_cell_type(ct) for ct in l3_types]

Retrieve files stored in our HISE project store

In [15]:

ps_df = hisepy.list_files_in_project_store('cohorts')
ps_df = ps_df[['id', 'name']]

Filter for files from the previous notebook using our search_id

In [16]:

search_df = ps_df[ps_df['name'].str.contains(search_id)]

Filter for cells related to the L1 cell type based on l3_types

In [17]:

type_string = '|'.join(l3_file_types)
type_df = search_df[search_df['name'].str.contains(type_string)]

In [18]:

type_df['name'].tolist()

Out[18]:

['nitrogen-rhenium-hafnium/diha_celltypist_L3_Intermediate_monocyte.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_HLAnegDRhi_cDC2.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ISGpos_CD14_monocyte.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_Core_CD14_monocyte.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ISGpos_CD16_monocyte.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_cDC1.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ISGpos_cDC2.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_pDC.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_C1Qpos_CD16_monocyte.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_Core_CD16_monocyte.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_IL1Bpos_CD14_monocyte.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_CD14pos_cDC2.h5ad',
 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ASDC.h5ad']

Process data for each L3 type¶

In [19]:

out_files = []
for uuid in type_df['id']:
    adata = read_adata_uuid(uuid)
    
    cell_type = adata.obs['AIFI_L3'][0]
    out_type = format_cell_type(cell_type)
    
    out_file = 'output/diha_clustered_celltypist_L3_{ct}_{d}.h5ad'.format(ct = out_type, d = date.today())
    if os.path.isfile(out_file):
        print('Previously processed {ct}; Skipping.'.format(ct = out_type))
        out_files.append(out_file)
    else:
        adata = process_adata(adata, resolution = 2)
        adata.write_h5ad(out_file)

    out_files.append(out_file)
    
    rm_cache_uuid(uuid)

downloading fileID: ec499b80-60c3-4af6-95cc-10e6163ca6ed
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: d02ec3b0-7b46-4834-8f9a-a565b267fb66
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 990639d0-c033-4766-9515-55c2abbffe75
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 7433c434-75d9-4577-8d43-c8374a679e33
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 39fa308f-dfdc-4c7c-976c-1aa96a09c3f5
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 87e0740a-2ced-429d-aa79-3ca89bf23922
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 6fecb37b-d274-4f8a-9dfa-2ee770c2012a
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 3e1e82a2-af53-43fe-8d28-19d71f3c33a6
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: baee6394-b320-4854-ad43-893e0b1af4b7
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 99bd42e2-4ad9-4b0b-a99b-095d2a59aec2
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 14d8e373-f825-4027-8b06-263b3dfa606a
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: d9c92fe2-5e6e-460c-acb9-8524375338c3
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.
downloading fileID: 25c55728-622b-4242-8984-6f38ce5cf5eb
Files have been successfully downloaded!
Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing
WARNING: adata.X seems to be already log-transformed.

Upload assembled results to HISE¶

In [20]:

study_space_uuid = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
title = 'DIHA CellTypist L3 Myeloid cells Clustered {d}'.format(d = date.today())

In [21]:

search_id = element_id()
search_id

Out[21]:

'krypton-erbium-lutetium'

In [22]:

in_files = type_df['id'].tolist()
in_files

Out[22]:

['ec499b80-60c3-4af6-95cc-10e6163ca6ed',
 'd02ec3b0-7b46-4834-8f9a-a565b267fb66',
 '990639d0-c033-4766-9515-55c2abbffe75',
 '7433c434-75d9-4577-8d43-c8374a679e33',
 '39fa308f-dfdc-4c7c-976c-1aa96a09c3f5',
 '87e0740a-2ced-429d-aa79-3ca89bf23922',
 '6fecb37b-d274-4f8a-9dfa-2ee770c2012a',
 '3e1e82a2-af53-43fe-8d28-19d71f3c33a6',
 'baee6394-b320-4854-ad43-893e0b1af4b7',
 '99bd42e2-4ad9-4b0b-a99b-095d2a59aec2',
 '14d8e373-f825-4027-8b06-263b3dfa606a',
 'd9c92fe2-5e6e-460c-acb9-8524375338c3',
 '25c55728-622b-4242-8984-6f38ce5cf5eb']

In [23]:

out_files

Out[23]:

['output/diha_clustered_celltypist_L3_Intermediate_monocyte_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_HLAnegDRhi_cDC2_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_ISGpos_CD14_monocyte_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_Core_CD14_monocyte_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_ISGpos_CD16_monocyte_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_cDC1_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_ISGpos_cDC2_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_pDC_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_C1Qpos_CD16_monocyte_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_Core_CD16_monocyte_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_IL1Bpos_CD14_monocyte_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_CD14pos_cDC2_2024-04-21.h5ad',
 'output/diha_clustered_celltypist_L3_ASDC_2024-04-21.h5ad']

In [24]:

hisepy.upload.upload_files(
    files = out_files,
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = in_files,
    destination = search_id
)

you are trying to upload file_ids... ['output/diha_clustered_celltypist_L3_Intermediate_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_HLAnegDRhi_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_cDC1_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_pDC_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_C1Qpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_IL1Bpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_CD14pos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ASDC_2024-04-21.h5ad']. Do you truly want to proceed?

Out[24]:

{'trace_id': '4a3d41c5-ce95-46eb-9659-4973e59a2182',
 'files': ['output/diha_clustered_celltypist_L3_Intermediate_monocyte_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_HLAnegDRhi_cDC2_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_ISGpos_CD14_monocyte_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_Core_CD14_monocyte_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_ISGpos_CD16_monocyte_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_cDC1_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_ISGpos_cDC2_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_pDC_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_C1Qpos_CD16_monocyte_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_Core_CD16_monocyte_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_IL1Bpos_CD14_monocyte_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_CD14pos_cDC2_2024-04-21.h5ad',
  'output/diha_clustered_celltypist_L3_ASDC_2024-04-21.h5ad']}

In [25]:

import session_info
session_info.show()

Out[25]:

Click to view session information

-----
anndata             0.10.3
hisepy              0.3.0
pandas              2.1.4
scanpy              1.9.6
session_info        1.0.0
-----

Click to view modules imported as dependencies

PIL                         10.0.1
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        23.2.0
attrs                       23.2.0
babel                       2.14.0
beatrix_jupyterlab          NA
brotli                      NA
cachetools                  5.3.1
certifi                     2024.02.02
cffi                        1.16.0
charset_normalizer          3.3.2
cloudpickle                 2.2.1
colorama                    0.4.6
comm                        0.1.4
cryptography                41.0.7
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
db_dtypes                   1.1.1
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
deprecated                  1.2.14
exceptiongroup              1.2.0
executing                   2.0.1
fastjsonschema              NA
fqdn                        NA
google                      NA
greenlet                    2.0.2
grpc                        1.58.0
grpc_status                 NA
h5py                        3.10.0
idna                        3.6
igraph                      0.10.8
importlib_metadata          NA
ipykernel                   6.28.0
ipython_genutils            0.2.0
ipywidgets                  8.1.1
isoduration                 NA
jedi                        0.19.1
jinja2                      3.1.2
joblib                      1.3.2
json5                       NA
jsonpointer                 2.4
jsonschema                  4.20.0
jsonschema_specifications   NA
jupyter_events              0.9.0
jupyter_server              2.12.1
jupyterlab_server           2.25.2
jwt                         2.8.0
kiwisolver                  1.4.5
leidenalg                   0.10.1
llvmlite                    0.41.0
lz4                         4.3.2
markupsafe                  2.1.3
matplotlib                  3.8.0
matplotlib_inline           0.1.6
mpl_toolkits                NA
mpmath                      1.3.0
natsort                     8.4.0
nbformat                    5.9.2
numba                       0.58.0
numpy                       1.24.0
opentelemetry               NA
overrides                   NA
packaging                   23.2
parso                       0.8.3
periodictable               1.5.2
pexpect                     4.8.0
pickleshare                 0.7.5
pkg_resources               NA
platformdirs                4.1.0
plotly                      5.18.0
prettytable                 3.9.0
prometheus_client           NA
prompt_toolkit              3.0.42
proto                       NA
psutil                      NA
ptyprocess                  0.7.0
pure_eval                   0.2.2
pyarrow                     13.0.0
pycparser                   2.21
pydev_ipython               NA
pydevconsole                NA
pydevd                      2.9.5
pydevd_file_utils           NA
pydevd_plugins              NA
pydevd_tracing              NA
pygments                    2.17.2
pynndescent                 0.5.11
pynvml                      NA
pyparsing                   3.1.1
pyreadr                     0.5.0
pythonjsonlogger            NA
pytz                        2023.3.post1
referencing                 NA
requests                    2.31.0
rfc3339_validator           0.1.4
rfc3986_validator           0.1.1
rpds                        NA
scipy                       1.11.4
send2trash                  NA
shapely                     1.8.5.post1
six                         1.16.0
sklearn                     1.3.2
sniffio                     1.3.0
socks                       1.7.1
sparse                      0.14.0
sql                         NA
sqlalchemy                  2.0.21
sqlparse                    0.4.4
stack_data                  0.6.2
sympy                       1.12
termcolor                   NA
texttable                   1.7.0
threadpoolctl               3.2.0
torch                       2.1.2+cu121
torchgen                    NA
tornado                     6.3.3
tqdm                        4.66.1
traitlets                   5.9.0
typing_extensions           NA
umap                        0.5.5
uri_template                NA
urllib3                     1.26.18
wcwidth                     0.2.12
webcolors                   1.13
websocket                   1.7.0
wrapt                       1.15.0
xarray                      2023.12.0
yaml                        6.0.1
zipp                        NA
zmq                         25.1.2
zoneinfo                    NA
zstandard                   0.22.0

-----
IPython             8.19.0
jupyter_client      8.6.0
jupyter_core        5.6.1
jupyterlab          4.1.5
notebook            6.5.4
-----
Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]
Linux-5.15.0-1058-gcp-x86_64-with-glibc2.31
-----
Session information updated at 2024-04-21 20:07

In [ ]: