import celltypist
from celltypist import models
import scanpy as sc
import pandas as pd
import numpy as np
import anndata
import re
import h5py
import scipy.sparse as scs
import concurrent.futures
def read_mat(h5_con):
mat = scs.csc_matrix(
(h5_con['matrix']['data'][:], # Count values
h5_con['matrix']['indices'][:], # Row indices
h5_con['matrix']['indptr'][:]), # Pointers for column positions
shape = tuple(h5_con['matrix']['shape'][:]) # Matrix dimensions
)
return mat
# define a function to obeservation (i.e. metadata)
def read_obs(h5con):
bc = h5con['matrix']['barcodes'][:]
bc = [x.decode('UTF-8') for x in bc]
# Initialized the DataFrame with cell barcodes
obs_df = pd.DataFrame({ 'barcodes' : bc })
# Get the list of available metadata columns
obs_columns = h5con['matrix']['observations'].keys()
# For each column
for col in obs_columns:
# Read the values
values = h5con['matrix']['observations'][col][:]
# Check for byte storage
if(isinstance(values[0], (bytes, bytearray))):
# Decode byte strings
values = [x.decode('UTF-8') for x in values]
# Add column to the DataFrame
obs_df[col] = values
return obs_df
# define a function to construct anndata object from a h5 file
def read_h5_anndata(h5_file):
h5_con = h5py.File(h5_file, mode = 'r')
# extract the expression matrix
mat = read_mat(h5_con)
# extract gene names
genes = h5_con['matrix']['features']['name'][:]
genes = [x.decode('UTF-8') for x in genes]
# extract metadata
obs_df = read_obs(h5_con)
# construct anndata
adata = anndata.AnnData(mat.T,
obs = obs_df)
# make sure the gene names aligned
adata.var_names = genes
adata.var_names_make_unique()
return adata
meta_data=pd.read_csv("hise_meta_data_2023-11-19.csv")
results = []
for file_name in meta_data['file.path']:
result = read_h5_anndata(file_name)
results.append(result)
adata = anndata.concat(results)
adata.write_h5ad('adata_all_raw.h5ad')
... storing 'batch_id' as categorical ... storing 'cell_name' as categorical ... storing 'chip_id' as categorical ... storing 'hto_barcode' as categorical ... storing 'hto_category' as categorical ... storing 'original_barcodes' as categorical ... storing 'pbmc_sample_id' as categorical ... storing 'pool_id' as categorical ... storing 'seurat_pbmc_type' as categorical ... storing 'well_id' as categorical