suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(SeuratDisk))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(plyr)
library(purrr)
suppressPackageStartupMessages(library(H5weaver))
library(parallel)
------------------------------------------------------------------------------ You have loaded plyr after dplyr - this is likely to cause problems. If you need functions from both plyr and dplyr, please load plyr first, then dplyr: library(plyr); library(dplyr) ------------------------------------------------------------------------------ Attaching package: ‘plyr’ The following objects are masked from ‘package:dplyr’: arrange, count, desc, failwith, id, mutate, rename, summarise, summarize Attaching package: ‘purrr’ The following object is masked from ‘package:plyr’: compact
reference <- readRDS("/home//jupyter/pbmc_multimodal_2023.rds")
#add level 2.5 labels
l3 <- as.character(reference[[]]$celltype.l3)
l2 <- as.character(reference[[]]$celltype.l2)
l2.5 <- l2
l2.5[l3 == "Treg Naive"] <- "Treg Naive"
l2.5[l3 == "Treg Memory"] <- "Treg Memory"
l2.5[l3 %in% c("CD8 TEM_4", "CD8 TEM_5")] <- "CD8 TEMRA"
reference <- AddMetaData(reference, metadata = l2.5, col.name = "celltype.l2.5")
Loading required package: SeuratObject Loading required package: sp Attaching package: ‘SeuratObject’ The following object is masked from ‘package:base’: intersect Loading required package: Seurat
sort(rownames(reference@assays$ADT@data))
hise_meta<-read.csv("hise_meta_data_2023-11-19.csv")
get_last_pattern <- function(x) {
split_vector <- strsplit(x, "/")[[1]]
last_pattern <- tail(split_vector, 1)
return(last_pattern)
}
last_patterns <- unlist(lapply(hise_meta$file.name, get_last_pattern))
#hise_meta$file.path<-paste0("cache/",hise_meta$file.id,"/",last_patterns)
hise_meta <-hise_meta %>% arrange(file.batchID)
b <- seq(10, dim(hise_meta)[1]+8, 10)
df_chunk_list<-lapply(seq_along(b), function(i) hise_meta[(b-9)[i]:b[i], ])
length(df_chunk_list)
mclapply(df_chunk_list,function(x){
so_list <- lapply(x$file.path[!is.na(x$file.path)],function(i){
sce <- read_h5_sce(i)
counts <- assay(sce, "counts")
rownames(counts)<- make.names(rownames(counts), unique=TRUE)
so<-CreateSeuratObject(
counts,meta.data =data.frame(sce@colData),
assay = "RNA")
})
combined <- Reduce(merge, so_list)
rm(so_list)
combined <- SCTransform(combined,method = "glmGamPoi", verbose = FALSE)
#find anchors
anchors <- FindTransferAnchors(
reference = reference,
query = combined,
normalization.method = "SCT",
reference.reduction = "spca",
dims = 1:50
)
#perform projection to get labels
combined <- MapQuery(
anchorset = anchors,
query = combined,
reference = reference,
refdata = list(
celltype.l1 = "celltype.l1",
celltype.l2 = "celltype.l2",
celltype.l3 = "celltype.l3",
celltype.l2.5 = "celltype.l2.5",
predicted_ADT = "ADT"
),
reference.reduction = "spca",
reduction.model = "wnn.umap"
)
mclapply( unique(combined[[]]$pbmc_sample_id),function(i){
labels<-combined[[]]%>% filter(pbmc_sample_id==i)
write.csv(labels,paste0("Labels_20231119/",i,".csv"))
combined_sub<-subset(combined,subset=pbmc_sample_id==i)
mat<-combined_sub@assays$predicted_ADT@data
list_mat <- list(i = mat@i, p = mat@p, x = mat@x, Dim = dim(mat),rownames = rownames(mat),
colnames = colnames(mat))
h5createFile(paste0("Labels_20231119/",i,"_ADT.h5"))
h5write(list_mat, paste0("Labels_20231119/",i,"_ADT.h5"), "mat")
},mc.cores=n_distinct(combined[[]]$pbmc_sample_id))
rm(combined)
gc()
},mc.cores=3)