Assemble final labels¶

Here, we'll gather all of the cell barcode labels generated for the major classes of PBMCs and generate a single complete set of annotations. We can then join these to the original, full PBMC dataset to generate a final object with all cell type annotations

In [1]:

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

from datetime import date
import hisepy
import os
import pandas as pd
import scanpy as sc

In [2]:

def read_adata_uuid(h5ad_uuid):
    h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)
    if not os.path.isdir(h5ad_path):
        hise_res = hisepy.reader.cache_files([h5ad_uuid])
    h5ad_filename = os.listdir(h5ad_path)[0]
    h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)
    adata = sc.read_h5ad(h5ad_file, backed = 'r')
    return adata

In [3]:

def read_parquet_uuid(pq_uuid):
    pq_path = '/home/jupyter/cache/{u}'.format(u = pq_uuid)
    if not os.path.isdir(pq_path):
        hise_res = hisepy.reader.cache_files([pq_uuid])
    pq_filename = os.listdir(pq_path)[0]
    pq_file = '{p}/{f}'.format(p = pq_path, f = pq_filename)
    df = pd.read_parquet(pq_file)
    return df

Read barcode annotations¶

In [4]:

anno_uuids = [
    '6897e950-2719-4575-87d0-1d2307d5db63', # B cells
    'eff741c2-b0fe-4dff-9914-ff5dc89ee95e', # Myeloid cells
    'b57066f8-c0d5-4bfb-aa9b-79c743aa5886', # NK cells
    '09f10d41-3170-4317-ba84-650268fcfcd2', # Other cells
    '75cb6602-0179-4128-9171-83c269a2b340' # T cells
]

In [5]:

anno_list = []
for uuid in anno_uuids:
    anno = read_parquet_uuid(uuid)
    anno_list.append(anno)

downloading fileID: 6897e950-2719-4575-87d0-1d2307d5db63
Files have been successfully downloaded!

In [6]:

anno = pd.concat(anno_list)

In [7]:

anno.shape

Out[7]:

(1952128, 4)

Read original clustered data¶

In [8]:

h5ad_uuid = '9db48bed-cd91-49ae-abd2-447ae478ca96'

In [9]:

adata = read_adata_uuid(h5ad_uuid)

In [10]:

adata.shape

Out[10]:

(1952128, 1236)

Add annotations to dataset¶

In [11]:

obs = adata.obs
obs = obs.reset_index(drop = True)
obs = obs.merge(anno, on = 'barcodes', how = 'left')
obs = obs.set_index('barcodes', drop = False)

In [12]:

adata.obs = obs

Preview annotations on UMAP projection¶

In [13]:

sc.pl.umap(
    adata,
    color = ['AIFI_L1', 'AIFI_L2', 'AIFI_L3'],
    ncols = 1
)

/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(

Output final annotations¶

In [14]:

obs = adata.obs
obs = obs.reset_index(drop = True)

In [15]:

umap_mat = adata.obsm['X_umap']
umap_df = pd.DataFrame(umap_mat, columns = ['umap_1', 'umap_2'])
obs['umap_1'] = umap_df['umap_1']
obs['umap_2'] = umap_df['umap_2']

In [16]:

obs.head()

Out[16]:

	barcodes	batch_id	cell_name	cell_uuid	chip_id	hto_barcode	hto_category	n_genes	n_mito_umis	n_reads	...	pct_counts_in_top_500_genes	total_counts_mito	log1p_total_counts_mito	pct_counts_mito	leiden	AIFI_L1	AIFI_L2	AIFI_L3	umap_1	umap_2
0	cf71f47048b611ea8957bafe6d70929e	B001	weathered_pernicious_polliwog	cf71f47048b611ea8957bafe6d70929e	B001-P1C1	TGATGGCCTATTGGG	singlet	1081	115	9307	...	79.628331	115	4.753590	4.032258	13	T cell	MAIT	CD8 MAIT	0.069207	6.428511
1	cf71f54248b611ea8957bafe6d70929e	B001	untidy_emulsive_hamadryad	cf71f54248b611ea8957bafe6d70929e	B001-P1C1	TGATGGCCTATTGGG	singlet	1923	178	22729	...	76.718846	178	5.187386	2.731737	2	T cell	Naive CD4 T cell	Core naive CD4 T cell	-6.025837	10.903475
2	cf71fa1048b611ea8957bafe6d70929e	B001	impatient_familial_cuckoo	cf71fa1048b611ea8957bafe6d70929e	B001-P1C1	TGATGGCCTATTGGG	singlet	1246	204	11107	...	78.295025	204	5.323010	5.935409	3	Monocyte	CD14 monocyte	Core CD14 monocyte	13.830215	9.230867
3	cf71fb7848b611ea8957bafe6d70929e	B001	long_weakminded_roebuck	cf71fb7848b611ea8957bafe6d70929e	B001-P1C1	TGATGGCCTATTGGG	singlet	1118	77	12990	...	83.378160	77	4.356709	2.071006	1	T cell	Memory CD4 T cell	CM CD4 T cell	-2.623835	10.297518
4	cf71ffba48b611ea8957bafe6d70929e	B001	dastardly_wintery_airedale	cf71ffba48b611ea8957bafe6d70929e	B001-P1C1	TGATGGCCTATTGGG	singlet	1965	363	15979	...	69.089532	363	5.897154	6.871096	9	Monocyte	CD14 monocyte	Core CD14 monocyte	12.289608	13.853219

5 rows × 55 columns

In [17]:

out_dir = 'output'
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

In [18]:

obs_out_csv = '{p}/ref_pbmc_labeled_meta_umap_{d}.csv'.format(p = out_dir, d = date.today())
obs.to_csv(obs_out_csv, index = False)

In [19]:

obs_out_parquet = '{p}/ref_pbmc_labeled_meta_umap_{d}.parquet'.format(p = out_dir, d = date.today())
obs.to_parquet(obs_out_parquet, index = False)

In [20]:

bc_anno = obs[['barcodes', 'AIFI_L1', 'AIFI_L2', 'AIFI_L3']]

In [21]:

label_out_csv = '{p}/ref_pbmc_barcode_labels_{d}.csv'.format(p = out_dir, d = date.today())
bc_anno.to_csv(label_out_csv, index = False)

In [22]:

label_out_parquet = '{p}/ref_pbmc_barcode_labels_{d}.parquet'.format(p = out_dir, d = date.today())
bc_anno.to_parquet(label_out_parquet, index = False)

Output labeled AnnData¶

In [23]:

out_h5ad = '{p}/ref_pbmc_labeled_{d}.h5ad'.format(p = out_dir, d = date.today())
adata.write_h5ad(out_h5ad)

Upload results to HISE¶

Finally, we'll use hisepy.upload.upload_files() to send a copy of our output to HISE to use for downstream analysis steps.

In [24]:

study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'
title = 'Labeled 10x 3-prime PBMC Reference {d}'.format(d = date.today())

In [25]:

in_files = [h5ad_uuid] + anno_uuids

In [26]:

in_files

Out[26]:

['9db48bed-cd91-49ae-abd2-447ae478ca96',
 '6897e950-2719-4575-87d0-1d2307d5db63',
 'eff741c2-b0fe-4dff-9914-ff5dc89ee95e',
 'b57066f8-c0d5-4bfb-aa9b-79c743aa5886',
 '09f10d41-3170-4317-ba84-650268fcfcd2',
 '75cb6602-0179-4128-9171-83c269a2b340']

In [27]:

out_files = [out_h5ad,
             obs_out_csv, obs_out_parquet,
             label_out_csv, label_out_parquet]

In [28]:

out_files

Out[28]:

['output/ref_pbmc_labeled_2024-03-09.h5ad',
 'output/ref_pbmc_labeled_meta_umap_2024-03-09.csv',
 'output/ref_pbmc_labeled_meta_umap_2024-03-09.parquet',
 'output/ref_pbmc_barcode_labels_2024-03-09.csv',
 'output/ref_pbmc_barcode_labels_2024-03-09.parquet']

In [29]:

hisepy.upload.upload_files(
    files = out_files,
    study_space_id = study_space_uuid,
    title = title,
    input_file_ids = in_files
)

output/ref_pbmc_labeled_2024-03-09.h5ad
output/ref_pbmc_labeled_meta_umap_2024-03-09.csv
output/ref_pbmc_labeled_meta_umap_2024-03-09.parquet
output/ref_pbmc_barcode_labels_2024-03-09.csv
output/ref_pbmc_barcode_labels_2024-03-09.parquet
you are trying to upload file_ids... ['output/ref_pbmc_labeled_2024-03-09.h5ad', 'output/ref_pbmc_labeled_meta_umap_2024-03-09.csv', 'output/ref_pbmc_labeled_meta_umap_2024-03-09.parquet', 'output/ref_pbmc_barcode_labels_2024-03-09.csv', 'output/ref_pbmc_barcode_labels_2024-03-09.parquet']. Do you truly want to proceed?

Out[29]:

{'trace_id': '3db6a8db-b7e2-4d17-b5a9-608bab76bc2c',
 'files': ['output/ref_pbmc_labeled_2024-03-09.h5ad',
  'output/ref_pbmc_labeled_meta_umap_2024-03-09.csv',
  'output/ref_pbmc_labeled_meta_umap_2024-03-09.parquet',
  'output/ref_pbmc_barcode_labels_2024-03-09.csv',
  'output/ref_pbmc_barcode_labels_2024-03-09.parquet']}

In [30]:

import session_info
session_info.show()

Out[30]:

Click to view session information

-----
anndata             0.10.3
hisepy              0.3.0
pandas              2.1.4
scanpy              1.9.6
session_info        1.0.0
-----

Click to view modules imported as dependencies

PIL                         10.0.1
anyio                       NA
arrow                       1.3.0
asttokens                   NA
attr                        23.2.0
attrs                       23.2.0
babel                       2.14.0
beatrix_jupyterlab          NA
brotli                      NA
cachetools                  5.3.1
certifi                     2024.02.02
cffi                        1.16.0
charset_normalizer          3.3.2
cloudpickle                 2.2.1
colorama                    0.4.6
comm                        0.1.4
cryptography                41.0.7
cycler                      0.10.0
cython_runtime              NA
dateutil                    2.8.2
db_dtypes                   1.1.1
debugpy                     1.8.0
decorator                   5.1.1
defusedxml                  0.7.1
deprecated                  1.2.14
exceptiongroup              1.2.0
executing                   2.0.1
fastjsonschema              NA
fqdn                        NA
google                      NA
greenlet                    2.0.2
grpc                        1.58.0
grpc_status                 NA
h5py                        3.10.0
idna                        3.6
igraph                      0.10.8
importlib_metadata          NA
ipykernel                   6.28.0
ipython_genutils            0.2.0
ipywidgets                  8.1.1
isoduration                 NA
jedi                        0.19.1
jinja2                      3.1.2
joblib                      1.3.2
json5                       NA
jsonpointer                 2.4
jsonschema                  4.20.0
jsonschema_specifications   NA
jupyter_events              0.9.0
jupyter_server              2.12.1
jupyterlab_server           2.25.2
jwt                         2.8.0
kiwisolver                  1.4.5
leidenalg                   0.10.1
llvmlite                    0.41.0
lz4                         4.3.2
markupsafe                  2.1.3
matplotlib                  3.8.0
matplotlib_inline           0.1.6
mpl_toolkits                NA
mpmath                      1.3.0
natsort                     8.4.0
nbformat                    5.9.2
numba                       0.58.0
numpy                       1.24.0
opentelemetry               NA
overrides                   NA
packaging                   23.2
parso                       0.8.3
pexpect                     4.8.0
pickleshare                 0.7.5
pkg_resources               NA
platformdirs                4.1.0
plotly                      5.18.0
prettytable                 3.9.0
prometheus_client           NA
prompt_toolkit              3.0.42
proto                       NA
psutil                      NA
ptyprocess                  0.7.0
pure_eval                   0.2.2
pyarrow                     13.0.0
pydev_ipython               NA
pydevconsole                NA
pydevd                      2.9.5
pydevd_file_utils           NA
pydevd_plugins              NA
pydevd_tracing              NA
pygments                    2.17.2
pynvml                      NA
pyparsing                   3.1.1
pyreadr                     0.5.0
pythonjsonlogger            NA
pytz                        2023.3.post1
referencing                 NA
requests                    2.31.0
rfc3339_validator           0.1.4
rfc3986_validator           0.1.1
rpds                        NA
scipy                       1.11.4
send2trash                  NA
shapely                     1.8.5.post1
six                         1.16.0
sklearn                     1.3.2
sniffio                     1.3.0
socks                       1.7.1
sql                         NA
sqlalchemy                  2.0.21
sqlparse                    0.4.4
stack_data                  0.6.2
sympy                       1.12
termcolor                   NA
texttable                   1.7.0
threadpoolctl               3.2.0
torch                       2.1.2+cu121
torchgen                    NA
tornado                     6.3.3
tqdm                        4.66.1
traitlets                   5.9.0
typing_extensions           NA
uri_template                NA
urllib3                     1.26.18
wcwidth                     0.2.12
webcolors                   1.13
websocket                   1.7.0
wrapt                       1.15.0
xarray                      2023.12.0
yaml                        6.0.1
zipp                        NA
zmq                         25.1.2
zoneinfo                    NA
zstandard                   0.22.0

-----
IPython             8.19.0
jupyter_client      8.6.0
jupyter_core        5.6.1
jupyterlab          4.1.2
notebook            6.5.4
-----
Python 3.10.13 | packaged by conda-forge | (main, Dec 23 2023, 15:36:39) [GCC 12.3.0]
Linux-5.15.0-1052-gcp-x86_64-with-glibc2.31
-----
Session information updated at 2024-03-09 05:21

In [ ]: