#!/usr/bin/env python # coding: utf-8 # In[22]: import celltypist from celltypist import models import scanpy as sc import pandas as pd import numpy as np import anndata import re import h5py import scipy.sparse as scs import concurrent.futures import scanpy.external as sce import gc from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ProcessPoolExecutor import copy # In[2]: def run_leiden(adata, resolution, key_added): # Make a copy of adata for thread safety adata_copy = copy.deepcopy(adata) adata_clustering = sc.tl.leiden(adata_copy, resolution=resolution, key_added=key_added, copy=True) return adata_clustering.obs def run_leiden_parallel(adata, tasks): with ProcessPoolExecutor(max_workers=5) as executor: # Make deep copies of adata for each task to ensure thread safety futures = [executor.submit(run_leiden, copy.deepcopy(adata), resolution, key_added) for resolution, key_added in tasks] results = [future.result() for future in futures] # Assign the results back to the original AnnData object for result, (_, key_added) in zip(results, tasks): adata.obs[key_added] = result[key_added] return adata # In[3]: adata=sc.read_h5ad('adata_processed.h5ad') # In[4]: Myeloid_cell_population=["0","12","14","15","16","18","21","23","24"] # In[5]: adata_subset=adata[adata.obs['leiden'].isin(Myeloid_cell_population)] # In[6]: del adata gc.collect() # In[7]: adata_subset=adata_subset.raw.to_adata() # In[8]: adata_subset.write_h5ad('Myeloid/Myeloidcells_raw.h5ad') # In[9]: adata_subset=sc.read_h5ad('Myeloid/Myeloidcells_raw.h5ad') # In[10]: adata_subset.raw= adata_subset # In[11]: sc.pp.normalize_total(adata_subset, target_sum=1e4) # In[12]: sc.pp.log1p(adata_subset) sc.pp.highly_variable_genes(adata_subset) adata_subset = adata_subset[:, adata_subset.var_names[adata_subset.var['highly_variable']]] # In[13]: sc.pp.scale(adata_subset) # In[14]: sc.tl.pca(adata_subset, svd_solver='arpack') # In[15]: sce.pp.harmony_integrate(adata_subset, 'cohort.cohortGuid',max_iter_harmony = 30) # In[ ]: sc.pp.neighbors(adata_subset, n_neighbors=50,use_rep='X_pca_harmony', n_pcs=30) sc.tl.umap(adata_subset,min_dist=0.05) # In[19]: adata_subset.write_h5ad('Myeloid/Myeloidcells_processed_pre_leiden.h5ad') # In[ ]: tasks = [(1, "leiden_resolution_1"),(1.5, "leiden_resolution_1.5"),(2, "leiden_resolution_2"),(2.5, "leiden_resolution_2.5"),(3, "leiden_resolution_3")] adata_subset = run_leiden_parallel(adata_subset, tasks) # In[ ]: adata_subset.write_h5ad('Myeloid/Myeloidcells_processed.h5ad') # In[ ]: adata_subset=adata_subset.raw.to_adata() sc.pp.normalize_total(adata_subset, target_sum=1e4) sc.pp.log1p(adata_subset) sc.tl.rank_genes_groups(adata_subset, 'leiden_resolution_1', method='wilcoxon') df_resolution_1=sc.get.rank_genes_groups_df(adata_subset,group=None) df_resolution_1.to_csv('Myeloid/Myeloid_res1.csv') sc.tl.rank_genes_groups(adata_subset, 'leiden_resolution_1.5', method='wilcoxon') df_resolution_1_5=sc.get.rank_genes_groups_df(adata_subset,group=None) df_resolution_1_5.to_csv('Myeloid/Myeloid_res1.5.csv') sc.tl.rank_genes_groups(adata_subset, 'leiden_resolution_2', method='wilcoxon') df_resolution_2=sc.get.rank_genes_groups_df(adata_subset,group=None) df_resolution_2.to_csv('Myeloid/Myeloid_res2.csv') sc.tl.rank_genes_groups(adata_subset, 'leiden_resolution_2.5', method='wilcoxon') df_resolution_2_5=sc.get.rank_genes_groups_df(adata_subset,group=None) df_resolution_2_5.to_csv('Myeloid/Myeloid_res2.5.csv') sc.tl.rank_genes_groups(adata_subset, 'leiden_resolution_3', method='wilcoxon') df_resolution_3=sc.get.rank_genes_groups_df(adata_subset,group=None) df_resolution_3.to_csv('Myeloid/Myeloid_res3.csv') # In[18]: adata_subset # In[ ]: adata_subset=sc.read_h5ad('Myeloid/Myeloidcells_processed.h5ad') # In[ ]: sc.tl.umap(adata_subset,min_dist=0.05) # In[ ]: adata_subset.write_h5ad('Myeloid/Myeloidcells_processed_20231107.h5ad') # In[ ]: sc.pl.umap(adata_subset, color=['doublet_score'], size=2,show=False,ncols=1 ,frameon=False) # In[ ]: sc.pl.umap(adata_subset, color=['predicted.celltype.l2.5'], size=2,show=False,ncols=1 ,frameon=False) # In[ ]: sc.pl.umap(adata_subset, color=['predicted_labels_celltypist'], size=2,show=False,ncols=1 ,frameon=False) # In[ ]: sc.pl.umap(adata_subset, color=['majority_voting_celltypist'], size=2,show=False,ncols=1 ,frameon=False) # In[ ]: sc.pl.umap(adata_subset, color=['leiden'], size=0.5,show=False,ncols=1 ,frameon=False) # In[ ]: sc.pl.umap(adata_subset, color=['leiden_resolution_1','leiden_resolution_1.5','leiden_resolution_2'], size=0.5,show=False,ncols=1 ,frameon=False) # In[ ]: sc.pl.umap(adata_subset, color=['CMV.IgG.Serology.Result.Interpretation'], size=2,show=False,ncols=1 ,frameon=False) # In[ ]: df=pd.read_csv('Myeloid/Myeloid_res2.csv') # In[ ]: df=df.groupby('group').head(20).reset_index(drop=True) # In[ ]: import matplotlib.pyplot as plt groups = df.groupby('group') fig, axs = plt.subplots(9, 6, figsize=(10, 30), squeeze=False) # Loop through each group and create a scatter plot in the corresponding subplot for i, (name, group) in enumerate(groups): row, col = i // 6, i % 6 axs[row, col].scatter(group['scores'], group['names']) axs[row, col].invert_yaxis() axs[row, col].set_title(str(name)+" vs Rest") axs[row, col].set_xlabel('U scores') axs[row, col].set_ylabel('Gene') fig.tight_layout() plt.savefig('scatter_plot.png') # In[ ]: adata_subset=sc.read_h5ad('Myeloid/Myeloidcells_processed_20231107.h5ad') # In[ ]: adata_subset=adata_subset.raw.to_adata() # In[ ]: sc.pp.normalize_total(adata_subset, target_sum=1e4) # In[ ]: sc.pp.log1p(adata_subset) # In[ ]: sc.pl.umap(adata_subset, color=['ITGAX','MS4A1','CD22'],use_raw=False, size=2,show=False,ncols=1 ,frameon=False) # In[ ]: