#!/usr/bin/env python # coding: utf-8 # # Assign Other annotations # # To assemble our annotations, we'll read our Other cell data and assign our expert annotations to those clusters. We'll then inspect the annotations in our UMAP projections, and output final labels for these cells # In[1]: import warnings warnings.simplefilter(action='ignore', category=FutureWarning) warnings.simplefilter(action='ignore', category=RuntimeWarning) from datetime import date import hisepy import os import pandas as pd import scanpy as sc # ### Helper function # # This function makes it easy to pull csv files stored in HISE as a pandas data.frame # In[2]: def read_csv_uuid(csv_uuid): csv_path = '/home/jupyter/cache/{u}'.format(u = csv_uuid) if not os.path.isdir(csv_path): hise_res = hisepy.reader.cache_files([csv_uuid]) csv_filename = os.listdir(csv_path)[0] csv_file = '{p}/{f}'.format(p = csv_path, f = csv_filename) df = pd.read_csv(csv_file, index_col = 0) return df # ## Read subclustering results from HISE # In[3]: cell_class = 'other' # In[4]: h5ad_uuid = '1eb6ca8c-b8ed-4968-b515-c954497441dc' h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid) # In[5]: if not os.path.isdir(h5ad_path): hise_res = hisepy.reader.cache_files([h5ad_uuid]) # In[6]: h5ad_filename = os.listdir(h5ad_path)[0] h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename) # In[7]: adata = sc.read_h5ad(h5ad_file) # In[8]: adata # ## Read annotations # In[9]: anno_uuid = '03817547-e2e3-412b-b36e-538d2bc74c87' anno = read_csv_uuid(anno_uuid) # In[10]: anno.head() # In[11]: join_col = 'leiden_resolution_1' # In[12]: anno[join_col] = anno[join_col].astype('string').astype('category') # In[13]: obs = adata.obs # In[14]: sum(obs[join_col].isin(anno[join_col])) # In[15]: obs_anno = obs.merge(anno, how = 'left', on = join_col) # In[16]: adata.obs = obs_anno adata.obs = adata.obs.set_index('barcodes', drop = False) # In[17]: adata.obs.head() # In[18]: sc.pl.umap(adata, color = ['AIFI_L1', 'AIFI_L2', 'AIFI_L3'], ncols = 1) # In[19]: sc.pl.umap(adata, color = ['leiden_resolution_1', 'leiden_resolution_1.5', 'leiden_resolution_2'], ncols = 1) # ## Output final annotations # In[20]: obs = adata.obs obs = obs.reset_index(drop = True) # In[21]: umap_mat = adata.obsm['X_umap'] umap_df = pd.DataFrame(umap_mat, columns = ['umap_1', 'umap_2']) obs['umap_1'] = umap_df['umap_1'] obs['umap_2'] = umap_df['umap_2'] # In[22]: obs.head() # In[23]: out_dir = 'output' if not os.path.isdir(out_dir): os.makedirs(out_dir) # In[24]: obs_out_csv = '{p}/ref_pbmc_{c}_labeled_meta_umap_{d}.csv'.format(p = out_dir, c = cell_class, d = date.today()) obs.to_csv(obs_out_csv, index = False) # In[25]: obs_out_parquet = '{p}/ref_pbmc_{c}_labeled_meta_umap_{d}.parquet'.format(p = out_dir, c = cell_class, d = date.today()) obs.to_parquet(obs_out_parquet, index = False) # In[26]: bc_anno = obs[['barcodes', 'AIFI_L1', 'AIFI_L2', 'AIFI_L3']] # In[27]: label_out_csv = '{p}/ref_pbmc_{c}_barcode_labels_{d}.csv'.format(p = out_dir, c = cell_class, d = date.today()) bc_anno.to_csv(label_out_csv, index = False) # In[28]: label_out_parquet = '{p}/ref_pbmc_{c}_barcode_labels_{d}.parquet'.format(p = out_dir, c = cell_class, d = date.today()) bc_anno.to_parquet(label_out_parquet, index = False) # ## Upload annotations to HISE # # Finally, we'll use `hisepy.upload.upload_files()` to send a copy of our output to HISE to use for downstream analysis steps. # In[29]: study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0' title = 'Other barcode annotations {d}'.format(d = date.today()) # In[30]: in_files = [h5ad_uuid, anno_uuid] # In[31]: in_files # In[32]: out_files = [obs_out_csv, obs_out_parquet, label_out_csv, label_out_parquet] # In[33]: out_files # In[34]: hisepy.upload.upload_files( files = out_files, study_space_id = study_space_uuid, title = title, input_file_ids = in_files ) # In[35]: import session_info session_info.show() # In[ ]: