import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
from datetime import date
import hisepy
import os
import pandas as pd
import re
import scanpy as sc
out_dir = 'output'
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
def cache_uuid_path(uuid):
cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
if not os.path.isdir(cache_path):
hise_res = hisepy.reader.cache_files([uuid])
filename = os.listdir(cache_path)[0]
cache_file = '{p}/{f}'.format(p = cache_path, f = filename)
return cache_file
def read_csv_uuid(uuid):
cache_file = cache_uuid_path(uuid)
res = pd.read_csv(cache_file)
return res
def read_adata_uuid(uuid):
cache_file = cache_uuid_path(uuid)
res = sc.read_h5ad(cache_file)
return res
def rm_cache_uuid(uuid):
cache_path = '/home/jupyter/cache/{u}'.format(u = uuid)
rm_call = 'rm -r {d}'.format(d = cache_path)
os.system(rm_call)
def format_cell_type(cell_type):
cell_type = re.sub('\\+', 'pos', cell_type)
cell_type = re.sub('-', 'neg', cell_type)
cell_type = re.sub(' ', '_', cell_type)
return cell_type
def element_id(n = 3):
import periodictable
from random import randrange
rand_el = []
for i in range(n):
el = randrange(0,118)
rand_el.append(periodictable.elements[el].name)
rand_str = '-'.join(rand_el)
return rand_str
def process_adata(adata, resolution = 2):
# Keep a copy of the raw data
adata = adata.raw.to_adata()
adata.raw = adata
print('Normalizing', end = "; ")
# Normalize and log transform
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
print('Finding HVGs', end = "; ")
# Restrict downstream steps to variable genes
sc.pp.highly_variable_genes(adata)
adata = adata[:, adata.var_names[adata.var['highly_variable']]].copy()
print('Scaling', end = "; ")
# Scale variable genes
sc.pp.scale(adata)
print('PCA', end = "; ")
# Run PCA
sc.tl.pca(adata, svd_solver = 'arpack')
print('Neighbors', end = "; ")
# Find nearest neighbors
sc.pp.neighbors(
adata,
n_neighbors = 50,
n_pcs = 30
)
print('Leiden', end = "; ")
# Find clusters
sc.tl.leiden(
adata,
resolution = resolution,
key_added = 'leiden_{r}'.format(r = resolution),
n_iterations = 2
)
print('UMAP', end = "; ")
# Run UMAP
sc.tl.umap(adata, min_dist = 0.05)
print('Renormalizing')
adata = adata.raw.to_adata()
adata.raw = adata
# Normalize and log transform
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
return adata
hierarchy_uuid = '1a44252c-8cab-4c8f-92c9-d8f3af633790'
hierarchy_df = read_csv_uuid(hierarchy_uuid)
downloading fileID: 1a44252c-8cab-4c8f-92c9-d8f3af633790 Files have been successfully downloaded!
hierarchy_df['AIFI_L1'].unique()
array(['B cell', 'DC', 'Erythrocyte', 'ILC', 'Monocyte', 'NK cell', 'Platelet', 'Progenitor cell', 'T cell'], dtype=object)
search_id = 'nitrogen-rhenium-hafnium'
l1_types = ['DC','Monocyte']
Get L3 cell types in the format used for filenames
l3_types = hierarchy_df['AIFI_L3'].loc[hierarchy_df['AIFI_L1'].isin(l1_types)]
l3_types = l3_types.tolist()
l3_file_types = [format_cell_type(ct) for ct in l3_types]
Retrieve files stored in our HISE project store
ps_df = hisepy.list_files_in_project_store('cohorts')
ps_df = ps_df[['id', 'name']]
Filter for files from the previous notebook using our search_id
search_df = ps_df[ps_df['name'].str.contains(search_id)]
Filter for cells related to the L1 cell type based on l3_types
type_string = '|'.join(l3_file_types)
type_df = search_df[search_df['name'].str.contains(type_string)]
type_df['name'].tolist()
['nitrogen-rhenium-hafnium/diha_celltypist_L3_Intermediate_monocyte.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_HLAnegDRhi_cDC2.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ISGpos_CD14_monocyte.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_Core_CD14_monocyte.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ISGpos_CD16_monocyte.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_cDC1.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ISGpos_cDC2.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_pDC.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_C1Qpos_CD16_monocyte.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_Core_CD16_monocyte.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_IL1Bpos_CD14_monocyte.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_CD14pos_cDC2.h5ad', 'nitrogen-rhenium-hafnium/diha_celltypist_L3_ASDC.h5ad']
out_files = []
for uuid in type_df['id']:
adata = read_adata_uuid(uuid)
cell_type = adata.obs['AIFI_L3'][0]
out_type = format_cell_type(cell_type)
out_file = 'output/diha_clustered_celltypist_L3_{ct}_{d}.h5ad'.format(ct = out_type, d = date.today())
if os.path.isfile(out_file):
print('Previously processed {ct}; Skipping.'.format(ct = out_type))
out_files.append(out_file)
else:
adata = process_adata(adata, resolution = 2)
adata.write_h5ad(out_file)
out_files.append(out_file)
rm_cache_uuid(uuid)
downloading fileID: ec499b80-60c3-4af6-95cc-10e6163ca6ed Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: d02ec3b0-7b46-4834-8f9a-a565b267fb66 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 990639d0-c033-4766-9515-55c2abbffe75 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 7433c434-75d9-4577-8d43-c8374a679e33 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 39fa308f-dfdc-4c7c-976c-1aa96a09c3f5 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 87e0740a-2ced-429d-aa79-3ca89bf23922 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 6fecb37b-d274-4f8a-9dfa-2ee770c2012a Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 3e1e82a2-af53-43fe-8d28-19d71f3c33a6 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: baee6394-b320-4854-ad43-893e0b1af4b7 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 99bd42e2-4ad9-4b0b-a99b-095d2a59aec2 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 14d8e373-f825-4027-8b06-263b3dfa606a Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: d9c92fe2-5e6e-460c-acb9-8524375338c3 Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed. downloading fileID: 25c55728-622b-4242-8984-6f38ce5cf5eb Files have been successfully downloaded! Normalizing; Finding HVGs; Scaling; PCA; Neighbors; Leiden; UMAP; Renormalizing WARNING: adata.X seems to be already log-transformed.
study_space_uuid = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
title = 'DIHA CellTypist L3 Myeloid cells Clustered {d}'.format(d = date.today())
search_id = element_id()
search_id
'krypton-erbium-lutetium'
in_files = type_df['id'].tolist()
in_files
['ec499b80-60c3-4af6-95cc-10e6163ca6ed', 'd02ec3b0-7b46-4834-8f9a-a565b267fb66', '990639d0-c033-4766-9515-55c2abbffe75', '7433c434-75d9-4577-8d43-c8374a679e33', '39fa308f-dfdc-4c7c-976c-1aa96a09c3f5', '87e0740a-2ced-429d-aa79-3ca89bf23922', '6fecb37b-d274-4f8a-9dfa-2ee770c2012a', '3e1e82a2-af53-43fe-8d28-19d71f3c33a6', 'baee6394-b320-4854-ad43-893e0b1af4b7', '99bd42e2-4ad9-4b0b-a99b-095d2a59aec2', '14d8e373-f825-4027-8b06-263b3dfa606a', 'd9c92fe2-5e6e-460c-acb9-8524375338c3', '25c55728-622b-4242-8984-6f38ce5cf5eb']
out_files
['output/diha_clustered_celltypist_L3_Intermediate_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_HLAnegDRhi_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_cDC1_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_pDC_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_C1Qpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_IL1Bpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_CD14pos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ASDC_2024-04-21.h5ad']
hisepy.upload.upload_files(
files = out_files,
study_space_id = study_space_uuid,
title = title,
input_file_ids = in_files,
destination = search_id
)
you are trying to upload file_ids... ['output/diha_clustered_celltypist_L3_Intermediate_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_HLAnegDRhi_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_cDC1_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_pDC_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_C1Qpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_IL1Bpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_CD14pos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ASDC_2024-04-21.h5ad']. Do you truly want to proceed?
{'trace_id': '4a3d41c5-ce95-46eb-9659-4973e59a2182', 'files': ['output/diha_clustered_celltypist_L3_Intermediate_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_HLAnegDRhi_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_cDC1_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ISGpos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_pDC_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_C1Qpos_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_Core_CD16_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_IL1Bpos_CD14_monocyte_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_CD14pos_cDC2_2024-04-21.h5ad', 'output/diha_clustered_celltypist_L3_ASDC_2024-04-21.h5ad']}
import session_info
session_info.show()
----- anndata 0.10.3 hisepy 0.3.0 pandas 2.1.4 scanpy 1.9.6 session_info 1.0.0 -----
PIL 10.0.1 anyio NA arrow 1.3.0 asttokens NA attr 23.2.0 attrs 23.2.0 babel 2.14.0 beatrix_jupyterlab NA brotli NA cachetools 5.3.1 certifi 2024.02.02 cffi 1.16.0 charset_normalizer 3.3.2 cloudpickle 2.2.1 colorama 0.4.6 comm 0.1.4 cryptography 41.0.7 cycler 0.10.0 cython_runtime NA dateutil 2.8.2 db_dtypes 1.1.1 debugpy 1.8.0 decorator 5.1.1 defusedxml 0.7.1 deprecated 1.2.14 exceptiongroup 1.2.0 executing 2.0.1 fastjsonschema NA fqdn NA google NA greenlet 2.0.2 grpc 1.58.0 grpc_status NA h5py 3.10.0 idna 3.6 igraph 0.10.8 importlib_metadata NA ipykernel 6.28.0 ipython_genutils 0.2.0 ipywidgets 8.1.1 isoduration NA jedi 0.19.1 jinja2 3.1.2 joblib 1.3.2 json5 NA jsonpointer 2.4 jsonschema 4.20.0 jsonschema_specifications NA jupyter_events 0.9.0 jupyter_server 2.12.1 jupyterlab_server 2.25.2 jwt 2.8.0 kiwisolver 1.4.5 leidenalg 0.10.1 llvmlite 0.41.0 lz4 4.3.2 markupsafe 2.1.3 matplotlib 3.8.0 matplotlib_inline 0.1.6 mpl_toolkits NA mpmath 1.3.0 natsort 8.4.0 nbformat 5.9.2 numba 0.58.0 numpy 1.24.0 opentelemetry NA overrides NA packaging 23.2 parso 0.8.3 periodictable 1.5.2 pexpect 4.8.0 pickleshare 0.7.5 pkg_resources NA platformdirs 4.1.0 plotly 5.18.0 prettytable 3.9.0 prometheus_client NA prompt_toolkit 3.0.42 proto NA psutil NA ptyprocess 0.7.0 pure_eval 0.2.2 pyarrow 13.0.0 pycparser 2.21 pydev_ipython NA pydevconsole NA pydevd 2.9.5 pydevd_file_utils NA pydevd_plugins NA pydevd_tracing NA pygments 2.17.2 pynndescent 0.5.11 pynvml NA pyparsing 3.1.1 pyreadr 0.5.0 pythonjsonlogger NA pytz 2023.3.post1 referencing NA requests 2.31.0 rfc3339_validator 0.1.4 rfc3986_validator 0.1.1 rpds NA scipy 1.11.4 send2trash NA shapely 1.8.5.post1 six 1.16.0 sklearn 1.3.2 sniffio 1.3.0 socks 1.7.1 sparse 0.14.0 sql NA sqlalchemy 2.0.21 sqlparse 0.4.4 stack_data 0.6.2 sympy 1.12 termcolor NA texttable 1.7.0 threadpoolctl 3.2.0 torch 2.1.2+cu121 torchgen NA tornado 6.3.3 tqdm 4.66.1 traitlets 5.9.0 typing_extensions NA umap 0.5.5 uri_template NA urllib3 1.26.18 wcwidth 0.2.12 webcolors 1.13 websocket 1.7.0 wrapt 1.15.0 xarray 2023.12.0 yaml 6.0.1 zipp NA zmq 25.1.2 zoneinfo NA zstandard 0.22.0
----- IPython 8.19.0 jupyter_client 8.6.0 jupyter_core 5.6.1 jupyterlab 4.1.5 notebook 6.5.4 ----- Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0] Linux-5.15.0-1058-gcp-x86_64-with-glibc2.31 ----- Session information updated at 2024-04-21 20:07