In this notebook, we use marker gene detection to select clusters that contain CD56-dim NK cells, then subset our dataset and perform a round of iterative clustering at multiple resolutions. At each resolution we identify marker genes, then generate plots that we can use to assess cell type identity.
The outputs of this analysis are used by our domain experts to assign cell type identities to our reference.
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import concurrent.futures
from concurrent.futures import ProcessPoolExecutor
import copy
from datetime import date
import hisepy
import os
import pandas as pd
import re
import scanpy as sc
import scanpy.external as sce
These functions will help with subsetting and performing leiden clustering at multiple resolutions in parallel.
select_clusters_by_gene_frac()
allows us to compute the fraction of cells in each cluster that express the provided gene (> 0 UMIs). This fraction is provided by scanpy
's dotplot function, which calculates these fractions for use in display. We then filter clusters based on the cutoff provided as a parameter to this function.
run_leiden()
and run_leiden_parallel()
enable parallel computation of multiple resolutions of leiden clustering.
def select_clusters_by_gene_frac(adata, gene, cutoff, clusters = 'leiden'):
gene_cl_frac = sc.pl.dotplot(
adata,
groupby = clusters,
var_names = gene,
return_fig = True
).dot_size_df
select_cl = gene_cl_frac.index[gene_cl_frac[gene] > cutoff].tolist()
return select_cl
def run_leiden(adata, resolution, key_added):
# Make a copy of adata for thread safety
adata_copy = copy.deepcopy(adata)
adata_clustering = sc.tl.leiden(
adata_copy,
resolution = resolution,
key_added = key_added,
copy = True)
return adata_clustering.obs
def run_leiden_parallel(adata, tasks):
with ProcessPoolExecutor(max_workers = 5) as executor:
# Make deep copies of adata for each task to ensure thread safety
futures = [executor.submit(run_leiden, copy.deepcopy(adata), resolution, key_added) for resolution, key_added in tasks]
results = [future.result() for future in futures]
# Assign the results back to the original AnnData object
for result, (_, key_added) in zip(results, tasks):
adata.obs[key_added] = result[key_added]
return adata
cell_class = 'nk-cells-dim'
h5ad_uuid = 'b2a3d1ba-312d-41ec-9e51-8ef4c33192fe'
h5ad_path = '/home/jupyter/cache/{u}'.format(u = h5ad_uuid)
if not os.path.isdir(h5ad_path):
hise_res = hisepy.reader.cache_files([h5ad_uuid])
h5ad_filename = os.listdir(h5ad_path)[0]
h5ad_file = '{p}/{f}'.format(p = h5ad_path, f = h5ad_filename)
adata = sc.read_h5ad(h5ad_file)
adata
AnnData object with n_obs × n_vars = 160848 × 1357 obs: 'barcodes', 'batch_id', 'cell_name', 'cell_uuid', 'chip_id', 'hto_barcode', 'hto_category', 'n_genes', 'n_mito_umis', 'n_reads', 'n_umis', 'original_barcodes', 'pbmc_sample_id', 'pool_id', 'well_id', 'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid', 'subject.biologicalSex', 'subject.race', 'subject.ethnicity', 'subject.birthYear', 'sample.visitName', 'sample.drawDate', 'file.id', 'subject.cmv', 'subject.bmi', 'celltypist.low', 'seurat.l1', 'seurat.l1.score', 'seurat.l2', 'seurat.l2.score', 'seurat.l2.5', 'seurat.l2.5.score', 'seurat.l3', 'seurat.l3.score', 'predicted_doublet', 'doublet_score', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'leiden', 'leiden_resolution_1', 'leiden_resolution_1.5', 'leiden_resolution_2' var: 'mito', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std' uns: 'celltypist.low_colors', 'hvg', 'leiden', 'leiden_colors', 'log1p', 'neighbors', 'pca', 'seurat.l2.5_colors', 'umap' obsm: 'X_pca', 'X_pca_harmony', 'X_umap' varm: 'PCs' obsp: 'connectivities', 'distances'
To get an overview of cluster identity, we'll use a set of marker genes that are expressed in major classes of NK cell types. To get to CD56-low cells, we'll remove all of the other types & contaminants.
markers = [
'NCAM1', # CD56
'GZMK', # CD56-intermediate/high
'IL32', # Adaptive NK
'IL7R', # ILC
'MKI67', # Proliferating
'ISG15', # ISG-high
'CD3D', # T cell doublets
'HBB', # RBC doublets
'PPBP' # Platelet doublets
]
sc.pl.dotplot(
adata,
groupby = 'leiden_resolution_1.5',
var_names = markers,
swap_axes = True
)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored dot_ax.scatter(x, y, **kwds)
To select clusters, we'll use select_clusters_by_gene_frac()
to select clusters for our desired cell type. We can also select clusters that express off-target genes (like HBB and PPBP), and use these to filter our list of clusters.
sc.pl.umap(adata, color = 'leiden_resolution_1.5', legend_loc = 'on data')
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
gzmk_pos_cl = select_clusters_by_gene_frac(
adata, gene = 'GZMK', cutoff = 0.4, clusters = 'leiden_resolution_1.5'
)
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = gzmk_pos_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
il32_pos_cl = select_clusters_by_gene_frac(
adata, gene = 'IL32', cutoff = 0.6, clusters = 'leiden_resolution_1.5'
)
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = il32_pos_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
il7r_pos_cl = select_clusters_by_gene_frac(
adata, gene = 'IL7R', cutoff = 0.8, clusters = 'leiden_resolution_1.5'
)
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = il7r_pos_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
mki67_pos_cl = select_clusters_by_gene_frac(
adata, gene = 'MKI67', cutoff = 0.6, clusters = 'leiden_resolution_1.5'
)
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = mki67_pos_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
isg15_pos_cl = select_clusters_by_gene_frac(
adata, gene = 'ISG15', cutoff = 0.6, clusters = 'leiden_resolution_1.5'
)
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = isg15_pos_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
hbb_pos_cl = select_clusters_by_gene_frac(
adata, gene = 'HBB', cutoff = 0.2, clusters = 'leiden_resolution_1.5'
)
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = hbb_pos_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
ppbp_pos_cl = select_clusters_by_gene_frac(
adata, gene = 'PPBP', cutoff = 0.2, clusters = 'leiden_resolution_1.5'
)
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = ppbp_pos_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
Here, we use Python's set
class to keep the clusters we want, and remove off-target hits.
keep_cl = set(adata.obs['leiden_resolution_1.5'])
keep_cl = keep_cl - set(gzmk_pos_cl)
keep_cl = keep_cl - set(il32_pos_cl)
keep_cl = keep_cl - set(il7r_pos_cl)
keep_cl = keep_cl - set(mki67_pos_cl)
keep_cl = keep_cl - set(isg15_pos_cl)
keep_cl = keep_cl - set(hbb_pos_cl)
keep_cl = keep_cl - set(ppbp_pos_cl)
keep_cl = list(keep_cl)
keep_cl.sort()
keep_cl
['0', '1', '10', '15', '2', '4', '5', '8', '9']
sc.pl.umap(adata, color = 'leiden_resolution_1.5', groups = keep_cl)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
Now, we can filter the dataset to get the subset we're after.
adata_subset = adata[adata.obs['leiden_resolution_1.5'].isin(keep_cl)]
adata_subset.shape
(105676, 1357)
As in the original analysis of this dataset, we'll need to normalize, select marker genes, and run Harmony to integrate across our cohorts.
It's important that we redo this step for our subset, as gene variability may differ when computed within our subset of cells rather than across the entire set of PBMCs. This key feature selection step will affect our ability to cluster and identify cell types, so we do this iteratively for the subset we're using now.
We previously stored raw counts in adata.raw
- we can now recover these original count data for analysis of the selected cells:
adata_subset = adata_subset.raw.to_adata()
adata_subset.shape
(105676, 33538)
adata_subset.raw = adata_subset
sc.pp.normalize_total(adata_subset, target_sum=1e4)
sc.pp.log1p(adata_subset)
sc.pp.highly_variable_genes(adata_subset)
adata_subset = adata_subset[:, adata_subset.var_names[adata_subset.var['highly_variable']]]
WARNING: adata.X seems to be already log-transformed.
sc.pp.scale(adata_subset)
/opt/conda/lib/python3.10/site-packages/scanpy/preprocessing/_simple.py:843: UserWarning: Received a view of an AnnData. Making a copy. view_to_actual(adata)
sc.tl.pca(adata_subset, svd_solver='arpack')
sce.pp.harmony_integrate(
adata_subset,
'cohort.cohortGuid',
max_iter_harmony = 30)
2024-03-04 21:34:12,206 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans... 2024-03-04 21:34:51,571 - harmonypy - INFO - sklearn.KMeans initialization complete. 2024-03-04 21:34:52,237 - harmonypy - INFO - Iteration 1 of 30 2024-03-04 21:36:09,707 - harmonypy - INFO - Iteration 2 of 30 2024-03-04 21:37:27,490 - harmonypy - INFO - Iteration 3 of 30 2024-03-04 21:38:45,545 - harmonypy - INFO - Iteration 4 of 30 2024-03-04 21:39:41,420 - harmonypy - INFO - Converged after 4 iterations
sc.pp.neighbors(
adata_subset,
n_neighbors = 50,
use_rep = 'X_pca_harmony',
n_pcs = 30)
sc.tl.umap(adata_subset, min_dist = 0.05)
out_dir = 'output'
if not os.path.isdir(out_dir):
os.makedirs(out_dir)
subset_h5ad = 'output/ref_pbmc_{c}_subset_{d}.h5ad'.format(c = cell_class, d = date.today())
adata_subset.write_h5ad(subset_h5ad)
Here, we use our helper functions to perform clustering at multiple resolutions. This can be helpful for finding a set of clusters that correspond well to marker expression and distinguish functional cell type differences.
%%time
sc.tl.leiden(
adata_subset,
resolution = 1.5,
key_added = 'leiden_resolution_1.5_nk-cells-dim'
)
IOStream.flush timed out IOStream.flush timed out IOStream.flush timed out IOStream.flush timed out IOStream.flush timed out IOStream.flush timed out
CPU times: user 21min, sys: 20.6 s, total: 21min 21s Wall time: 21min 12s
clustered_h5ad = 'output/ref_pbmc_{c}_clustered_{d}.h5ad'.format(c = cell_class, d = date.today())
adata_subset.write_h5ad(clustered_h5ad)
Now that we've clustered, it's helpful to plot reference labels and clusters on our UMAP projection to see how they fall relative to each other.
sc.pl.umap(
adata_subset,
color = ['seurat.l2.5'],
size = 2,
show = False,
ncols = 1 ,
frameon = False
)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
<Axes: title={'center': 'seurat.l2.5'}, xlabel='UMAP1', ylabel='UMAP2'>
sc.pl.umap(
adata_subset,
color = ['celltypist.low'],
size = 2,
show = False,
ncols = 1 ,
frameon = False
)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
<Axes: title={'center': 'celltypist.low'}, xlabel='UMAP1', ylabel='UMAP2'>
CMV status is also helpful to view, as CMV can drive expansion of some cell types.
sc.pl.umap(
adata_subset,
color = ['subject.cmv'],
size = 2,
show = False,
ncols = 1 ,
frameon = False
)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
<Axes: title={'center': 'subject.cmv'}, xlabel='UMAP1', ylabel='UMAP2'>
sc.pl.umap(
adata_subset,
color = 'leiden_resolution_1.5_nk-cells-dim',
size = 2,
show = False,
ncols = 1 ,
frameon = False
)
/opt/conda/lib/python3.10/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored cax = scatter(
<Axes: title={'center': 'leiden_resolution_1.5_nk-cells-dim'}, xlabel='UMAP1', ylabel='UMAP2'>
umap_mat = adata_subset.obsm['X_umap']
umap_df = pd.DataFrame(umap_mat, columns = ['umap_1', 'umap_2'])
obs = adata_subset.obs
obs['umap_1'] = umap_df['umap_1']
obs['umap_2'] = umap_df['umap_2']
out_csv = 'output/ref_pbmc_{c}_clustered_umap_meta_{d}.csv'.format(c = cell_class, d = date.today())
obs.to_csv(out_csv)
/opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str) /opt/conda/lib/python3.10/site-packages/pandas/core/internals/blocks.py:2540: RuntimeWarning: invalid value encountered in cast values = values.astype(str)
out_parquet = 'output/ref_pbmc_{c}_clustered_umap_meta_{d}.parquet'.format(c = cell_class, d = date.today())
obs = obs.to_parquet(out_parquet)
adata_subset = adata_subset.raw.to_adata()
sc.pp.normalize_total(adata_subset, target_sum=1e4)
sc.pp.log1p(adata_subset)
res_csv = '{p}/ref_{c}_res{n}_markers_{d}.csv'.format(p = out_dir, c = cell_class, n = 1.5, d = date.today())
sc.tl.rank_genes_groups(adata_subset, 'leiden_resolution_1.5_nk-cells-dim', method = 'wilcoxon')
df = sc.get.rank_genes_groups_df(adata_subset, group = None)
df.to_csv(res_csv)
marker_file = res_csv
WARNING: adata.X seems to be already log-transformed.
Finally, we'll use hisepy.upload.upload_files()
to send a copy of our output to HISE to use for downstream analysis steps.
study_space_uuid = '64097865-486d-43b3-8f94-74994e0a72e0'
title = 'NK CD56dim cell subclustering {d}'.format(d = date.today())
in_files = [h5ad_uuid]
in_files
['b2a3d1ba-312d-41ec-9e51-8ef4c33192fe']
out_files = [clustered_h5ad, out_csv, out_parquet, marker_file]
out_files
['output/ref_pbmc_nk-cells-dim_clustered_2024-03-04.h5ad', 'output/ref_pbmc_nk-cells-dim_clustered_umap_meta_2024-03-04.csv', 'output/ref_pbmc_nk-cells-dim_clustered_umap_meta_2024-03-04.parquet', 'output/ref_nk-cells-dim_res1.5_markers_2024-03-04.csv']
hisepy.upload.upload_files(
files = out_files,
study_space_id = study_space_uuid,
title = title,
input_file_ids = in_files
)
output/ref_pbmc_nk-cells-dim_clustered_2024-03-04.h5ad output/ref_pbmc_nk-cells-dim_clustered_umap_meta_2024-03-04.csv output/ref_pbmc_nk-cells-dim_clustered_umap_meta_2024-03-04.parquet output/ref_nk-cells-dim_res1.5_markers_2024-03-04.csv Cannot determine the current notebook. 1) /home/jupyter/scRNA-Reference-IH-A/03-Subclustering/09a-Python_NK_CD56dim.ipynb 2) /home/jupyter/scRNA-Reference-IH-A/03-Subclustering/11b-Python_T_cells_cd8-mait.ipynb 3) /home/jupyter/scRNA-Reference-IH-A/version_adaptation/04c-Python_check_treg.ipynb Please select (1-3)
you are trying to upload file_ids... ['output/ref_pbmc_nk-cells-dim_clustered_2024-03-04.h5ad', 'output/ref_pbmc_nk-cells-dim_clustered_umap_meta_2024-03-04.csv', 'output/ref_pbmc_nk-cells-dim_clustered_umap_meta_2024-03-04.parquet', 'output/ref_nk-cells-dim_res1.5_markers_2024-03-04.csv']. Do you truly want to proceed?
import session_info
session_info.show()