For visualizations, it's helpful to have an .h5ad object with all genes normalized in the main adata.X
object, rather than just highly variable genes.
We'll perform this for the main object as well as the subset objects generated for each of the major cell classes.
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
from datetime import date
import hisepy
import os
import scanpy as sc
def read_adata_uuid(h5ad_uuid):
h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)
if not os.path.isdir(h5ad_path):
hise_res = hisepy.reader.cache_files([h5ad_uuid])
h5ad_filename = os.listdir(h5ad_path)[0]
h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)
adata = sc.read_h5ad(h5ad_file)
return adata
def normalize_raw_data(adata):
adata = adata.raw.to_adata()
adata.raw = adata
sc.pp.normalize_total(adata, target_sum = 1e4)
sc.pp.log1p(adata)
return adata
out_dir = 'output'
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
out_files = []
h5ad_uuids = {
'all': '6e8972a5-9463-4230-84b4-a20de055b9c3',
'b-cells': '3ba425f9-b8e0-4a03-ae69-bac3d35b00b3',
'myeloid-cells': 'a366815b-8092-4a66-9afa-c4fd3834edc4',
'nk-cells': '90ce9dff-28b0-4b12-abaa-deab19fb68c9',
'other': '8f55628c-cc28-4011-abb2-e3e13dad2b49',
't-cells': '546b8939-cb2a-4b28-bf99-898ee4c0217f'
}
for cell_class,uuid in h5ad_uuids.items():
out_file = 'output/ref_clean_pbmc_{c}_labeled-all-genes_{d}.h5ad'.format(
c = cell_class,
d = date.today()
)
adata = read_adata_uuid(uuid)
adata = normalize_raw_data(adata)
adata.write_h5ad(out_file)
out_files.append(out_file)
WARNING: adata.X seems to be already log-transformed. downloading fileID: 3ba425f9-b8e0-4a03-ae69-bac3d35b00b3 Files have been successfully downloaded! WARNING: adata.X seems to be already log-transformed. downloading fileID: a366815b-8092-4a66-9afa-c4fd3834edc4 Files have been successfully downloaded! WARNING: adata.X seems to be already log-transformed. downloading fileID: 90ce9dff-28b0-4b12-abaa-deab19fb68c9 Files have been successfully downloaded! WARNING: adata.X seems to be already log-transformed. downloading fileID: 8f55628c-cc28-4011-abb2-e3e13dad2b49 Files have been successfully downloaded! WARNING: adata.X seems to be already log-transformed. downloading fileID: 546b8939-cb2a-4b28-bf99-898ee4c0217f Files have been successfully downloaded! WARNING: adata.X seems to be already log-transformed.
Finally, we'll use hisepy.upload.upload_files()
to send a copy of our output to HISE to use for downstream analysis steps.
study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'
title = '10x 3-prime PBMC Clean Reference with all genes {d}'.format(d = date.today())
in_files = list(h5ad_uuids.values())
in_files
['6e8972a5-9463-4230-84b4-a20de055b9c3', '3ba425f9-b8e0-4a03-ae69-bac3d35b00b3', 'a366815b-8092-4a66-9afa-c4fd3834edc4', '90ce9dff-28b0-4b12-abaa-deab19fb68c9', '8f55628c-cc28-4011-abb2-e3e13dad2b49', '546b8939-cb2a-4b28-bf99-898ee4c0217f']
out_files
['output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad']
hisepy.upload.upload_files(
files = out_files,
study_space_id = study_space_uuid,
title = title,
input_file_ids = in_files
)
output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad you are trying to upload file_ids... ['output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad']. Do you truly want to proceed?
{'trace_id': '8a5c7691-5c1f-4eef-abf1-a1ca4872e6c3', 'files': ['output/ref_clean_pbmc_all_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_b-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_myeloid-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_nk-cells_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_other_labeled-all-genes_2024-03-11.h5ad', 'output/ref_clean_pbmc_t-cells_labeled-all-genes_2024-03-11.h5ad']}
import session_info
session_info.show()
----- anndata 0.10.3 hisepy 0.3.0 scanpy 1.9.6 session_info 1.0.0 -----
PIL 10.0.1 anyio NA arrow 1.3.0 asttokens NA attr 23.2.0 attrs 23.2.0 babel 2.14.0 beatrix_jupyterlab NA brotli NA cachetools 5.3.1 certifi 2024.02.02 cffi 1.16.0 charset_normalizer 3.3.2 cloudpickle 2.2.1 colorama 0.4.6 comm 0.1.4 cryptography 41.0.7 cycler 0.10.0 cython_runtime NA dateutil 2.8.2 db_dtypes 1.1.1 debugpy 1.8.0 decorator 5.1.1 defusedxml 0.7.1 deprecated 1.2.14 exceptiongroup 1.2.0 executing 2.0.1 fastjsonschema NA fqdn NA google NA greenlet 2.0.2 grpc 1.58.0 grpc_status NA h5py 3.10.0 idna 3.6 igraph 0.10.8 importlib_metadata NA ipykernel 6.28.0 ipython_genutils 0.2.0 ipywidgets 8.1.1 isoduration NA jedi 0.19.1 jinja2 3.1.2 joblib 1.3.2 json5 NA jsonpointer 2.4 jsonschema 4.20.0 jsonschema_specifications NA jupyter_events 0.9.0 jupyter_server 2.12.1 jupyterlab_server 2.25.2 jwt 2.8.0 kiwisolver 1.4.5 leidenalg 0.10.1 llvmlite 0.41.0 lz4 4.3.2 markupsafe 2.1.3 matplotlib 3.8.0 matplotlib_inline 0.1.6 mpl_toolkits NA mpmath 1.3.0 natsort 8.4.0 nbformat 5.9.2 numba 0.58.0 numpy 1.24.0 opentelemetry NA overrides NA packaging 23.2 pandas 2.1.4 parso 0.8.3 pexpect 4.8.0 pickleshare 0.7.5 pkg_resources NA platformdirs 4.1.0 plotly 5.18.0 prettytable 3.9.0 prometheus_client NA prompt_toolkit 3.0.42 proto NA psutil NA ptyprocess 0.7.0 pure_eval 0.2.2 pyarrow 13.0.0 pydev_ipython NA pydevconsole NA pydevd 2.9.5 pydevd_file_utils NA pydevd_plugins NA pydevd_tracing NA pygments 2.17.2 pynvml NA pyparsing 3.1.1 pyreadr 0.5.0 pythonjsonlogger NA pytz 2023.3.post1 referencing NA requests 2.31.0 rfc3339_validator 0.1.4 rfc3986_validator 0.1.1 rpds NA scipy 1.11.4 send2trash NA shapely 1.8.5.post1 six 1.16.0 sklearn 1.3.2 sniffio 1.3.0 socks 1.7.1 sql NA sqlalchemy 2.0.21 sqlparse 0.4.4 stack_data 0.6.2 sympy 1.12 termcolor NA texttable 1.7.0 threadpoolctl 3.2.0 torch 2.1.2+cu121 torchgen NA tornado 6.3.3 tqdm 4.66.1 traitlets 5.9.0 typing_extensions NA uri_template NA urllib3 1.26.18 wcwidth 0.2.12 webcolors 1.13 websocket 1.7.0 wrapt 1.15.0 xarray 2023.12.0 yaml 6.0.1 zipp NA zmq 25.1.2 zoneinfo NA
----- IPython 8.19.0 jupyter_client 8.6.0 jupyter_core 5.6.1 jupyterlab 4.1.2 notebook 6.5.4 ----- Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0] Linux-5.15.0-1053-gcp-x86_64-with-glibc2.31 ----- Session information updated at 2024-03-11 16:47