import celltypist
from celltypist import models
import scanpy as sc
import pandas as pd
import numpy as np
import anndata
import re
import h5py
import scipy.sparse as scs
import concurrent.futures
import scanpy.external as sce
import gc
from concurrent.futures import ProcessPoolExecutor
import copy
adata=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_2023-11-23.h5ad')
np.shape(adata.obs)
(1193859, 57)
T_Cells_Annotation=pd.read_csv('T_Cells_Res1.5_Annotation.csv')
T_Cells_Annotation['leiden_resolution_1.5']=[str (x) for x in T_Cells_Annotation['leiden_resolution_1.5']]
T_Cells_Annotation_filtered=T_Cells_Annotation.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata.obs=adata.obs.merge(T_Cells_Annotation_filtered, on=["leiden_resolution_1.5"], how="left")
df_Part_1=adata.obs[adata.obs['leiden_resolution_1.5'].isin(T_Cells_Annotation_filtered['leiden_resolution_1.5'])]
df_Part_1.to_parquet("Annotation_Part_1.parquet")
np.shape(df_Part_1)
(886753, 63)
T_Cells_Annotation_C5_C12=pd.read_csv('T_Cells_Res3_C5_C12_Annotation.csv')
T_Cells_Annotation_C5_C12['leiden_resolution_3_C5_C12']=[str (x) for x in T_Cells_Annotation_C5_C12['leiden_resolution_3_C5_C12']]
T_Cells_Annotation_C5_C12=T_Cells_Annotation_C5_C12.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata_C5_C12=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C5_C12_2023-11-28_redo.h5ad')
adata_C5_C12.obs=adata_C5_C12.obs.merge(T_Cells_Annotation_C5_C12, on=["leiden_resolution_3_C5_C12"], how="left")
df_Part_2=adata_C5_C12.obs[adata_C5_C12.obs['leiden_resolution_3_C5_C12'].isin(T_Cells_Annotation_C5_C12['leiden_resolution_3_C5_C12'])]
df_Part_2.to_parquet("Annotation_Part_2.parquet")
np.shape(df_Part_2)
(114083, 66)
np.shape(adata_C5_C12.obs)
(123022, 66)
np.shape(adata_C5_C12.obs.query("leiden_resolution_3_C5_C12.isin(['8','14'])"))
(8939, 66)
(8939+114083)==np.shape(adata_C5_C12.obs)[0]
True
T_Cells_Annotation_C11=pd.read_csv('T_Cells_Res1.5_C11_Annotation.csv')
T_Cells_Annotation_C11['leiden_resolution_1.5_C11']=[str (x) for x in T_Cells_Annotation_C11['leiden_resolution_1.5_C11']]
T_Cells_Annotation_C11=T_Cells_Annotation_C11.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata_C11=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C11_2023-11-27.h5ad')
adata_C11.obs=adata_C11.obs.merge(T_Cells_Annotation_C11, on=["leiden_resolution_1.5_C11"], how="left")
df_Part_3=adata_C11.obs[adata_C11.obs['leiden_resolution_1.5_C11'].isin(T_Cells_Annotation_C11['leiden_resolution_1.5_C11'])]
df_Part_3.to_parquet("Annotation_Part_3.parquet")
np.shape(df_Part_3)
(48095, 64)
np.shape(adata_C11.obs)
(50060, 64)
np.shape(adata_C11.obs.query("`leiden_resolution_1.5_C11`.isin(['8'])"))
(1965, 64)
(1965+48095)==np.shape(adata_C11.obs)[0]
True
T_Cells_Annotation_C13=pd.read_csv('T_Cells_Res1.5_C13_Annotation.csv')
T_Cells_Annotation_C13['leiden_resolution_1.5_C13']=[str (x) for x in T_Cells_Annotation_C13['leiden_resolution_1.5_C13']]
T_Cells_Annotation_C13=T_Cells_Annotation_C13.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata_C13=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C13_2023-11-27_retry.h5ad')
adata_C13.obs=adata_C13.obs.merge(T_Cells_Annotation_C13, on=["leiden_resolution_1.5_C13"], how="left")
df_Part_4=adata_C13.obs[adata_C13.obs['leiden_resolution_1.5_C13'].isin(T_Cells_Annotation_C13['leiden_resolution_1.5_C13'])]
df_Part_4.to_parquet("Annotation_Part_4.parquet")
np.shape(df_Part_4)
(36364, 64)
np.shape(adata_C13.obs)
(42191, 64)
np.shape(adata_C13.obs.query("`leiden_resolution_1.5_C13`.isin(['1','11'])"))
(5827, 64)
(5827+36364)==np.shape(adata_C13.obs)[0]
True
T_Cells_Annotation_C14=pd.read_csv('T_Cells_Res1.5_C14_Annotation.csv')
T_Cells_Annotation_C14['leiden_resolution_1.5_C14']=[str (x) for x in T_Cells_Annotation_C14['leiden_resolution_1.5_C14']]
T_Cells_Annotation_C14=T_Cells_Annotation_C14.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata_C14=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C14_2023-11-27.h5ad')
adata_C14.obs=adata_C14.obs.merge(T_Cells_Annotation_C14, on=["leiden_resolution_1.5_C14"], how="left")
df_Part_5=adata_C14.obs[adata_C14.obs['leiden_resolution_1.5_C14'].isin(T_Cells_Annotation_C14['leiden_resolution_1.5_C14'])]
df_Part_5.to_parquet("Annotation_Part_5.parquet")
np.shape(df_Part_5)
(40051, 64)
np.shape(adata_C14.obs)
(40051, 64)
T_Cells_Annotation_C15=pd.read_csv('T_Cells_Res1.5_C15_Annotation.csv')
T_Cells_Annotation_C15['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13']=[str (x) for x in T_Cells_Annotation_C15['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13']]
T_Cells_Annotation_C15=T_Cells_Annotation_C15.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata_C15=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C15_with_gdT_from_C5_C12_C11_C13_2023-11-29.h5ad')
adata_C15.obs=adata_C15.obs.merge(T_Cells_Annotation_C15, on=["leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13"], how="left")
df_Part_6=adata_C15.obs[adata_C15.obs['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13'].isin(T_Cells_Annotation_C15['leiden_resolution_1.5_C15_with_gdT_from_C5_C12_C11_C13'])]
df_Part_6.to_parquet("Annotation_Part_6.parquet")
np.shape(df_Part_6)
(51009, 66)
np.shape(adata_C15.obs)
(51009, 66)
T_Cells_Annotation_C19=pd.read_csv('T_Cells_Res2_C19_Annotation.csv')
T_Cells_Annotation_C19['leiden_resolution_2_C19']=[str (x) for x in T_Cells_Annotation_C19['leiden_resolution_2_C19']]
T_Cells_Annotation_C19=T_Cells_Annotation_C19.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata_C19=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C19_2023-12-05.h5ad')
adata_C19.obs=adata_C19.obs.merge(T_Cells_Annotation_C19, on=["leiden_resolution_2_C19"], how="left")
df_Part_7=adata_C19.obs[adata_C19.obs['leiden_resolution_2_C19'].isin(T_Cells_Annotation_C19['leiden_resolution_2_C19'])]
df_Part_7.to_parquet("Annotation_Part_7.parquet")
np.shape(df_Part_7)
(13029, 65)
np.shape(adata_C19.obs)
(13029, 65)
T_Cells_Annotation_C21=pd.read_csv('T_Cells_Res1.5_C21_Annotation.csv')
T_Cells_Annotation_C21['leiden_resolution_1.5_C21']=[str (x) for x in T_Cells_Annotation_C21['leiden_resolution_1.5_C21']]
T_Cells_Annotation_C21=T_Cells_Annotation_C21.query("AIFI_L2_Final=='Yes' & AIFI_L3_Final=='Yes'")
adata_C21=sc.read_h5ad('/home/jupyter/reference_generating_new/T/Tcells_processed_LV2_C21_2023-11-27.h5ad')
adata_C21.obs=adata_C21.obs.merge(T_Cells_Annotation_C21, on=["leiden_resolution_1.5_C21"], how="left")
df_Part_8=adata_C21.obs[adata_C21.obs['leiden_resolution_1.5_C21'].isin(T_Cells_Annotation_C21['leiden_resolution_1.5_C21'])]
df_Part_8.to_parquet("Annotation_Part_8.parquet")
np.shape(adata.obs)[0]==np.shape(df_Part_1)[0]+np.shape(df_Part_2)[0]+np.shape(df_Part_3)[0]+np.shape(df_Part_4)[0]+np.shape(df_Part_5)[0]+np.shape(df_Part_6)[0]+np.shape(df_Part_7)[0]+np.shape(df_Part_8)[0]
True
import os
import pandas as pd
csv_directory = '.'
output_excel = 'combined.xlsx'
writer = pd.ExcelWriter(output_excel, engine='openpyxl')
with pd.ExcelWriter(output_excel, engine='openpyxl') as writer:
for csv_file in os.listdir(csv_directory):
if csv_file.endswith('.csv'):
file_path = os.path.join(csv_directory, csv_file)
df = pd.read_csv(file_path)
sheet_name = os.path.splitext(csv_file)[0]
df.to_excel(writer, sheet_name=sheet_name, index=False)