import warnings
warnings.filterwarnings('ignore')
import os
import numpy as np
import pandas as pd
import scanpy as sc
import squidpy as sq
import anndata as ad
import scipy as sp
dataset = '07_10x_Visium_Human_Breast_Cancer'
adata = sq.read.visium(f'../../data/reference_data/{dataset}')
adata
AnnData object with n_obs × n_vars = 4325 × 36601 obs: 'in_tissue', 'array_row', 'array_col' var: 'gene_ids', 'feature_types', 'genome' uns: 'spatial' obsm: 'spatial'
adata.var_names_make_unique()
sc.pp.calculate_qc_metrics(adata, inplace=True)
sc.pp.filter_cells(adata, min_genes=100)
sc.pp.filter_genes(adata, min_cells=50)
adata.layers['counts'] = adata.X.copy()
sq.pl.spatial_scatter(adata, color=['n_genes', 'total_counts'])
# remove MT genes
non_mito_genes_list = [name for name in adata.var_names if not name.startswith('MT-')]
adata = adata[:, non_mito_genes_list]
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
adata
AnnData object with n_obs × n_vars = 4263 × 14906 obs: 'in_tissue', 'array_row', 'array_col', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'n_genes' var: 'gene_ids', 'feature_types', 'genome', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells' uns: 'spatial', 'log1p' obsm: 'spatial' layers: 'counts'
sq.gr.spatial_neighbors(adata, coord_type="grid", delaunay=False)
sq.gr.spatial_autocorr(adata, mode="moran",
n_perms=100, n_jobs=10,
genes=adata.var_names)
100%|██████████| 100/100 [00:24<00:00, 4.13/s]
adata.uns["moranI"]["I"]
SCGB2A2 0.735714 CYP2A6 0.626647 PIP 0.616256 SCGB1D2 0.603673 DCD 0.593345 ... NAALADL1 -0.015524 C16orf46 -0.015939 SPINDOC -0.017712 ITGA2 -0.017847 NKRF -0.020196 Name: I, Length: 14906, dtype: float64
n_svgs = 200
sel_genes = (
adata.uns["moranI"]["I"].sort_values(ascending=False).head(n_svgs).index.tolist()
)
sq.pl.spatial_scatter(
adata, color=sel_genes[:20], figsize=(5, 5), size=1.5,
cmap="Reds", img=False, use_raw=False
)
# select top 50 variable genes as reference
adata = adata[:, sel_genes]
adata
View of AnnData object with n_obs × n_vars = 4263 × 200 obs: 'in_tissue', 'array_row', 'array_col', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'n_genes' var: 'gene_ids', 'feature_types', 'genome', 'n_cells_by_counts', 'mean_counts', 'log1p_mean_counts', 'pct_dropout_by_counts', 'total_counts', 'log1p_total_counts', 'n_cells' uns: 'spatial', 'log1p', 'spatial_neighbors', 'moranI' obsm: 'spatial' layers: 'counts' obsp: 'spatial_connectivities', 'spatial_distances'
adata.write_h5ad(f'../../results/00_prepare_reference_data/{dataset}.h5ad')