import warnings
warnings.filterwarnings('ignore')
import os
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import anndata as ad
import scipy as sp
adata = sc.read_h5ad('../../data/from_spatial_omics/DBiT-seq/DBiT-seq_liu2020high_E10_whole_gene_best_data.h5ad')
adata
AnnData object with n_obs × n_vars = 936 × 22802 obs: 'leiden' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm' uns: 'hvg', 'leiden', 'leiden_colors', 'moranI', 'neighbors', 'pca', 'spatial_neighbors', 'umap' obsm: 'X_pca', 'X_umap', 'spatial' varm: 'PCs' obsp: 'connectivities', 'distances', 'spatial_connectivities', 'spatial_distances'
sc.pp.calculate_qc_metrics(adata, inplace=True)
adata.layers['counts'] = adata.X.copy()
sc.pp.filter_cells(adata, min_genes=10)
sc.pp.filter_genes(adata, min_cells=50)
adata
AnnData object with n_obs × n_vars = 936 × 12060 obs: 'leiden', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'n_genes' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells' uns: 'hvg', 'leiden', 'leiden_colors', 'moranI', 'neighbors', 'pca', 'spatial_neighbors', 'umap' obsm: 'X_pca', 'X_umap', 'spatial' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances', 'spatial_connectivities', 'spatial_distances'
sq.pl.spatial_scatter(adata, color=['leiden', 'n_genes', 'total_counts'], size=25, shape=None)
WARNING: Please specify a valid `library_id` or set it permanently in `adata.uns['spatial']`
# remove MT genes
non_mito_genes_list = [name for name in adata.var_names if not name.startswith('mt-')]
adata = adata[:, non_mito_genes_list]
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sq.gr.spatial_neighbors(adata, coord_type="generic", delaunay=True)
sq.gr.spatial_autocorr(adata, mode="moran",
n_perms=100, n_jobs=10,
genes=adata.var_names)
100%|██████████| 100/100 [00:10<00:00, 9.69/s]
n_svgs = 200
sel_genes = (
adata.uns["moranI"]["I"].sort_values(ascending=False).head(n_svgs).index.tolist()
)
sq.pl.spatial_scatter(
adata, color=sel_genes[:10], figsize=(5, 5), size=25,
cmap="Reds", shape=None, use_raw=False
)
WARNING: Please specify a valid `library_id` or set it permanently in `adata.uns['spatial']`
# select top 50 variable genes as reference
adata = adata[:, sel_genes]
adata
View of AnnData object with n_obs × n_vars = 936 × 200 obs: 'leiden', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'n_genes' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells' uns: 'hvg', 'leiden', 'leiden_colors', 'moranI', 'neighbors', 'pca', 'spatial_neighbors', 'umap', 'log1p' obsm: 'X_pca', 'X_umap', 'spatial' varm: 'PCs' layers: 'counts' obsp: 'connectivities', 'distances', 'spatial_connectivities', 'spatial_distances'
adata.write_h5ad('../../results/00_prepare_reference_data/28_DBiT_seq_E10_whole.h5ad')