#!/usr/bin/env python # coding: utf-8 # In[1]: import warnings warnings.filterwarnings('ignore') import os import numpy as np import pandas as pd import scanpy as sc import squidpy as sq import anndata as ad import scipy as sp # In[2]: adata = sc.read_h5ad('../../data/from_spatial_omics/DBiT-seq/DBiT-seq_liu2020high_GSM4364244_E11-FL-1L_gene_data.h5ad') # In[3]: adata # In[4]: sc.pp.calculate_qc_metrics(adata, inplace=True) # In[5]: adata.layers['counts'] = adata.X.copy() # In[6]: sc.pp.filter_cells(adata, min_genes=10) sc.pp.filter_genes(adata, min_cells=50) # In[7]: adata # In[8]: sq.pl.spatial_scatter(adata, color=['leiden', 'n_genes', 'total_counts'], size=25, shape=None) # In[9]: # remove MT genes non_mito_genes_list = [name for name in adata.var_names if not name.startswith('mt-')] adata = adata[:, non_mito_genes_list] # In[10]: sc.pp.normalize_total(adata) sc.pp.log1p(adata) # In[11]: sq.gr.spatial_neighbors(adata, coord_type="generic", delaunay=True) sq.gr.spatial_autocorr(adata, mode="moran", n_perms=100, n_jobs=10, genes=adata.var_names) # In[12]: n_svgs = 200 sel_genes = ( adata.uns["moranI"]["I"].sort_values(ascending=False).head(n_svgs).index.tolist() ) # In[13]: sq.pl.spatial_scatter( adata, color=sel_genes[:10], figsize=(5, 5), size=25, cmap="Reds", shape=None, use_raw=False ) # In[14]: # select top 50 variable genes as reference adata = adata[:, sel_genes] # In[15]: adata # In[16]: adata.write_h5ad('../../results/00_prepare_reference_data/30_DBiT_seq_E11_FL_1L.h5ad') # In[ ]: