quiet_library <- function(...) { suppressPackageStartupMessages(library(...)) } quiet_library(dplyr) quiet_library(H5weaver) quiet_library(hise) quiet_library(parallel) quiet_library(purrr) quiet_library(Seurat) quiet_library(SeuratObject) options(timeout = 10000) if(!dir.exists("reference")) { dir.create("reference") } download.file( "https://zenodo.org/records/7779017/files/pbmc_multimodal_2023.rds?download=1", "reference/pbmc_multimodal_2023.rds" ) reference <- readRDS("reference/pbmc_multimodal_2023.rds") l3 <- as.character(reference@meta.data$celltype.l3) l2 <- as.character(reference@meta.data$celltype.l2) l2.5 <- l2 l2.5[l3 == "Treg Naive"] <- "Treg Naive" l2.5[l3 == "Treg Memory"] <- "Treg Memory" l2.5[l3 %in% c("CD8 TEM_4", "CD8 TEM_5")] <- "CD8 TEMRA" reference <- AddMetaData(reference, metadata = l2.5, col.name = "celltype.l2.5") sample_meta_uuid <- "2da66a1a-17cc-498b-9129-6858cf639caf" res <- cacheFiles(list(sample_meta_uuid)) sample_meta_file <- list.files( paste0("cache/", sample_meta_uuid), pattern = ".csv", full.names = TRUE ) hise_meta <- read.csv(sample_meta_file) # Helper function with cache path check cache_file <- function(h5_uuid) { cache_dir <- paste0("cache/",h5_uuid) if(!dir.exists(cache_dir)) { res <- cacheFiles(list(h5_uuid)) } } # Walk file UUIDs to cache file_ids <- hise_meta$file.id walk(file_ids, cache_file) walk(file_ids, cache_file) hise_meta <- hise_meta %>% arrange(file.batchID) b <- rep(1:11, each = 10)[1:nrow(hise_meta)] df_chunk_list <- split(hise_meta, b) map_int(df_chunk_list, nrow) out_dir <- "output/Hao_PBMC/" if(!dir.exists(out_dir)) { dir.create(out_dir, recursive = TRUE) } read_so <- function(h5_uuid) { cache_dir <- paste0("cache/",h5_uuid) h5_file <- list.files( cache_dir, pattern = ".h5", full.names = TRUE ) counts <- read_h5_dgCMatrix(h5_file) rownames(counts) <- make.unique(rownames(counts)) meta <- read_h5_cell_meta(h5_file) rownames(meta) <- meta$barcodes so <- CreateSeuratObject( counts, meta.data = meta, assay = "RNA") so } write_labels <- function(sample_labels, sample_id) { out_file <- paste0("output/Hao_PBMC/", sample_id, "_Hao_PBMC.csv") write.csv( sample_labels, out_file, row.names = FALSE, quote = FALSE ) } get_sample_adt <- function(sample_meta, adt_data) { adt_data[,sample_meta$barcodes] } write_adt <- function(sample_adt, sample_id) { out_file <- paste0("output/Hao_PBMC/", sample_id, "_ADT.h5") list_mat <- list( i = sample_adt@i, p = sample_adt@p, x = sample_adt@x, Dim = dim(sample_adt), rownames = rownames(sample_adt), colnames = colnames(sample_adt) ) h5createFile(out_file) h5write(list_mat, out_file, "mat") } check_output <- function(sample_id) { out_file <- paste0("output/Hao_PBMC/", sample_id, "_Hao_PBMC.csv") file.exists(out_file) } label_chunk <- function(meta_data) { # check for existing labels sample_ids <- unique(meta_data$pbmc_sample_id) out_check <- map_lgl(sample_ids, check_output) if(sum(out_check) == length(sample_ids)) { return(invisible(NULL)) } # Read and combine individual samples so_list <- map(meta_data$file.id, read_so) combined <- Reduce(merge, so_list) rm(so_list) # Transform data to match the reference combined <- SCTransform( combined, method = "glmGamPoi", verbose = FALSE ) # find anchors anchors <- FindTransferAnchors( reference = reference, query = combined, normalization.method = "SCT", reference.reduction = "spca", dims = 1:50 ) #perform projection to get labels combined <- MapQuery( anchorset = anchors, query = combined, reference = reference, refdata = list( celltype.l1 = "celltype.l1", celltype.l2 = "celltype.l2", celltype.l3 = "celltype.l3", celltype.l2.5 = "celltype.l2.5", predicted_ADT = "ADT" ), reference.reduction = "spca", reduction.model = "wnn.umap" ) # Split the metadata by sample for output sample_meta_list <- split(combined@meta.data, combined@meta.data$pbmc_sample_id) # Write labels walk2( sample_meta_list, names(sample_meta_list), write_labels ) # Subset and write projected ADT data sample_adt_list <- map( sample_meta_list, get_sample_adt, adt_data = combined@assays$predicted_ADT@data) walk2( sample_adt_list, names(sample_meta_list), write_adt ) } res <- mclapply( df_chunk_list, label_chunk, mc.cores = 3 ) label_files <- list.files( out_dir, pattern = ".csv", full.names = TRUE ) all_labels <- map_dfr(label_files, read.csv) all_labels <- all_labels %>% select(pbmc_sample_id, barcodes, starts_with("predicted")) out_csv <- paste0( "output/ref_seurat_labels_PBMC_", Sys.Date(), ".csv" ) write.csv( all_labels, out_csv, row.names = FALSE, quote = FALSE ) adt_files <- list.files( out_dir, pattern = ".h5", full.names = TRUE ) read_adt_mat <- function(adt_file) { mat_list <- rhdf5::h5read(adt_file, "/mat") mat_list <- lapply(mat_list, as.vector) mat <- Matrix::sparseMatrix( i = mat_list$i, p = mat_list$p, x = mat_list$x, dims = mat_list$Dim, dimnames = list( mat_list$rownames, mat_list$colnames ), index1 = FALSE ) mat } adt_list <- map(adt_files, read_adt_mat) all_adt <- do.call(cbind, adt_list) h5_list <- list( matrix_dgCMatrix = all_adt ) h5_list <- h5_list_convert_from_dgCMatrix(h5_list) str(h5_list) out_h5 <- paste0( "output/ref_seurat_projected-ADT_PBMC_", Sys.Date(), ".h5" ) write_h5_list( h5_list, out_h5 ) study_space_uuid <- "64097865-486d-43b3-8f94-74994e0a72e0" title <- paste("Ref. Seurat Label Predictions", Sys.Date()) in_list <- as.list(c(sample_meta_uuid, hise_meta$file.id)) out_list <- as.list(c(out_csv, out_h5)) uploadFiles( files = out_list, studySpaceId = study_space_uuid, title = title, inputFileIds = in_list, store = "project", doPrompt = FALSE ) sessionInfo()