quiet_library <- function(...) {suppressPackageStartupMessages(library(...))} quiet_library(DESeq2) quiet_library(dplyr) quiet_library(hise) quiet_library(purrr) quiet_library(furrr) plan(multicore, workers = 12) if(!dir.exists("output")) { dir.create("output") } in_files <- c() out_files <- c() prepare_deseq_data <- function(mat, meta, design) { meta$cohort.cohortGuid <- factor(meta$cohort.cohortGuid, levels = c("BR1", "BR2")) meta$subject.biologicalSex <- factor(meta$subject.biologicalSex, levels = c("Female", "Male")) meta$subject.cmv <- factor(meta$subject.cmv, levels = c("Negative", "Positive")) mat <- mat[,meta$barcodes] mat <- as.matrix(mat) mode(mat) <- "integer" DESeqDataSetFromMatrix( mat, colData = meta, design = design ) } format_deseq_results <- function(dds, cell_type, group_by, fg, bg) { meta <- as.data.frame(colData(dds)) res <- results(dds, contrast = c(group_by, fg, bg)) res <- data.frame(res) res <- res %>% arrange(padj) %>% mutate(direction = ifelse(log2FoldChange > 0, paste0("HigherIn", fg), paste0("HigherIn", bg))) %>% mutate(gene = rownames(.), cell_type = cell_type, fg = fg, n_fg = sum(res[[group_by]] == fg), bg = bg, n_bg = sum(res[[group_by]] == bg)) %>% select(cell_type, fg, n_fg, bg, n_bg, gene, log2FoldChange, padj, direction, baseMean, lfcSE, stat, pvalue) res } all_uuid <- "edf436fb-7063-4655-9055-6ad431fe7159" res <- cacheFiles(list(all_uuid)) pb_file <- list.files(paste0("cache/", all_uuid), pattern = ".rds", full.names = TRUE) pb_data <- readRDS(pb_file) in_files = c(in_files, all_uuid) dds_data <- map2( pb_data$mats, pb_data$meta, prepare_deseq_data, design = ~ cohort.cohortGuid + subject.biologicalSex + subject.cmv ) deseq_res <- future_map( dds_data, DESeq, parallel = FALSE, # parallelization handled by future_map quiet = TRUE, .options = furrr_options(seed = 3030L) ) cohort_res <- map2_dfr( deseq_res, names(deseq_res), format_deseq_results, group_by = "cohort.cohortGuid", fg = "BR1", bg = "BR2" ) out_group <- "cohort" out_csv <- paste0( "output/diha_pbmc_", out_group, "_pseudobulk_deseq2_", Sys.Date(), ".csv" ) write.csv( cohort_res, out_csv, quote = FALSE, row.names = FALSE ) out_files <- c(out_files, out_csv) dds_data <- map2( pb_data$mats, pb_data$meta, prepare_deseq_data, design = ~ subject.cmv + cohort.cohortGuid + subject.biologicalSex ) deseq_res <- future_map( dds_data, DESeq, parallel = FALSE, # parallelization handled by future_map quiet = TRUE, .options = furrr_options(seed = 3030L) ) cmv_res <- map2_dfr( deseq_res, names(deseq_res), format_deseq_results, group_by = "subject.cmv", fg = "Positive", bg = "Negative" ) out_group <- "cmv" out_csv <- paste0( "output/diha_pbmc_", out_group, "_pseudobulk_deseq2_", Sys.Date(), ".csv" ) write.csv( cmv_res, out_csv, quote = FALSE, row.names = FALSE ) out_files <- c(out_files, out_csv) dds_data <- map2( pb_data$mats, pb_data$meta, prepare_deseq_data, design = ~ subject.biologicalSex + cohort.cohortGuid + subject.cmv ) deseq_res <- future_map( dds_data, DESeq, parallel = FALSE, # parallelization handled by future_map quiet = TRUE, .options = furrr_options(seed = 3030L) ) sex_res <- map2_dfr( deseq_res, names(deseq_res), format_deseq_results, group_by = "subject.biologicalSex", fg = "Female", bg = "Male" ) out_group <- "sex" out_csv <- paste0( "output/diha_pbmc_", out_group, "_pseudobulk_deseq2_", Sys.Date(), ".csv" ) write.csv( sex_res, out_csv, quote = FALSE, row.names = FALSE ) out_files <- c(out_files, out_csv) cmv_pos_uuid <- "c8e65e37-d11d-4baf-b450-6eda124ef102" res <- cacheFiles(list(cmv_pos_uuid)) pb_file <- list.files(paste0("cache/", cmv_pos_uuid), pattern = ".rds", full.names = TRUE) pb_data <- readRDS(pb_file) in_files = c(in_files, cmv_pos_uuid) table(pb_data$meta[[2]]$subject.cmv) ncol(pb_data$mats[[2]]) dds_data <- map2( pb_data$mats, pb_data$meta, prepare_deseq_data, design = ~ cohort.cohortGuid + subject.biologicalSex ) deseq_res <- future_map( dds_data, DESeq, parallel = FALSE, # parallelization handled by future_map quiet = TRUE, .options = furrr_options(seed = 3030L) ) cmvpos_cohort_res <- map2_dfr( deseq_res, names(deseq_res), format_deseq_results, group_by = "cohort.cohortGuid", fg = "BR1", bg = "BR2" ) out_group <- "cmvpos_cohort" out_csv <- paste0( "output/diha_pbmc_", out_group, "_pseudobulk_deseq2_", Sys.Date(), ".csv" ) write.csv( cmvpos_cohort_res, out_csv, quote = FALSE, row.names = FALSE ) out_files <- c(out_files, out_csv) cmv_neg_uuid <- "42de2197-4ce6-400e-a0e7-212f2b25e896" res <- cacheFiles(list(cmv_neg_uuid)) pb_file <- list.files(paste0("cache/", cmv_neg_uuid), pattern = ".rds", full.names = TRUE) pb_data <- readRDS(pb_file) in_files <- c(in_files, cmv_neg_uuid) dds_data <- map2( pb_data$mats, pb_data$meta, prepare_deseq_data, design = ~ cohort.cohortGuid + subject.biologicalSex ) deseq_res <- future_map( dds_data, DESeq, parallel = FALSE, # parallelization handled by future_map quiet = TRUE, .options = furrr_options(seed = 3030L) ) cmvneg_cohort_res <- map2_dfr( deseq_res, names(deseq_res), format_deseq_results, group_by = "cohort.cohortGuid", fg = "BR1", bg = "BR2" ) out_group <- "cmvneg_cohort" out_csv <- paste0( "output/diha_pbmc_", out_group, "_pseudobulk_deseq2_", Sys.Date(), ".csv" ) write.csv( cmvneg_cohort_res, out_csv, quote = FALSE, row.names = FALSE ) out_files <- c(out_files, out_csv) female_uuid <- "56f09312-eb3f-40b7-b840-368d1ddbc22b" res <- cacheFiles(list(female_uuid)) pb_file <- list.files(paste0("cache/", female_uuid), pattern = ".rds", full.names = TRUE) pb_data <- readRDS(pb_file) in_files = c(in_files, female_uuid) dds_data <- map2( pb_data$mats, pb_data$meta, prepare_deseq_data, design = ~ cohort.cohortGuid + subject.cmv ) deseq_res <- future_map( dds_data, DESeq, parallel = FALSE, # parallelization handled by future_map quiet = TRUE, .options = furrr_options(seed = 3030L) ) female_cohort_res <- map2_dfr( deseq_res, names(deseq_res), format_deseq_results, group_by = "cohort.cohortGuid", fg = "BR1", bg = "BR2" ) out_group <- "female_cohort" out_csv <- paste0( "output/diha_pbmc_", out_group, "_pseudobulk_deseq2_", Sys.Date(), ".csv" ) write.csv( female_cohort_res, out_csv, quote = FALSE, row.names = FALSE ) out_files <- c(out_files, out_csv) male_uuid <- "4bf2bab1-b3c8-456b-aa90-566cd7b03577" res <- cacheFiles(list(male_uuid)) pb_file <- list.files(paste0("cache/", male_uuid), pattern = ".rds", full.names = TRUE) pb_data <- readRDS(pb_file) in_files <- c(in_files, male_uuid) dds_data <- map2( pb_data$mats, pb_data$meta, prepare_deseq_data, design = ~ cohort.cohortGuid + subject.cmv ) deseq_res <- future_map( dds_data, DESeq, parallel = FALSE, # parallelization handled by future_map quiet = TRUE, .options = furrr_options(seed = 3030L) ) male_cohort_res <- map2_dfr( deseq_res, names(deseq_res), format_deseq_results, group_by = "cohort.cohortGuid", fg = "BR1", bg = "BR2" ) out_group <- "male_cohort" out_csv <- paste0( "output/diha_pbmc_", out_group, "_pseudobulk_deseq2_", Sys.Date(), ".csv" ) write.csv( male_cohort_res, out_csv, quote = FALSE, row.names = FALSE ) out_files <- c(out_files, out_csv) study_space_uuid <- "64097865-486d-43b3-8f94-74994e0a72e0" title <- paste("PBMC Ref. Seurat Pseudobulk DESeq2 Res.", Sys.Date()) in_list <- as.list(in_files) in_list out_list <- as.list(out_files) out_list uploadFiles( files = out_list, studySpaceId = study_space_uuid, title = title, inputFileIds = in_list, store = "project", destination = "pseudobulk_deseq2", doPrompt = FALSE ) sessionInfo()