suppressPackageStartupMessages(library(readxl))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(stringr))
library(hise)
library(plyr)
library(purrr)
fileDescToDataframe_beta <- function(descriptors,keep_labs = FALSE) {
descriptors<-descriptors[[1]]
assertthat::assert_that(typeof(descriptors) == "list")
assertthat::assert_that(typeof(keep_labs) == "logical")
do.call(
rbind.fill,
lapply(
descriptors,
function(desc) {
desc <- unlist(desc)
desc <- desc[!grepl("scheme", names(desc))]
names(desc) <- sub("^descriptors.","",names(desc))
desc <- desc[!grepl("^specimens", names(desc))]
desc <- desc[!grepl("^lab", names(desc))]
desc <- desc[!grepl("^emr", names(desc))]
desc <- desc[!grepl("^survey", names(desc))]
desc <- desc[!grepl("^revision", names(desc))]
desc <- desc[!grepl("^file.userTags", names(desc))]
desc <- as.list(desc)
df <- as.data.frame(desc)
df
}
)
)
}
fileDescToDataframe <- function(descriptors,
keep_labs = FALSE) {
assertthat::assert_that(typeof(descriptors) == "list")
assertthat::assert_that(typeof(keep_labs) == "logical")
do.call(
rbind,
lapply(
descriptors,
function(desc) {
desc <- unlist(desc)
desc <- desc[!grepl("scheme", names(desc))]
names(desc) <- sub("^descriptors.","",names(desc))
desc <- desc[!grepl("^specimens", names(desc))]
desc <- desc[!grepl("^lab", names(desc))]
desc <- desc[!grepl("^emr", names(desc))]
desc <- desc[!grepl("^survey", names(desc))]
desc <- desc[!grepl("^revision", names(desc))]
desc <- desc[!grepl("^file.userTags", names(desc))]
desc <- as.list(desc)
df <- as.data.frame(desc)
df
}
)
)
}
BR1_rna_desc <- getFileDescriptors(
fileType = "scRNA-seq-labeled",
filter = list(cohort.cohortGuid = "BR1"))
BR2_rna_desc <- getFileDescriptors(
fileType = "scRNA-seq-labeled",
filter = list(cohort.cohortGuid = "BR2"))
UP1_rna_desc <- getFileDescriptors(
fileType = "scRNA-seq-labeled",
filter = list(cohort.cohortGuid = "UP1"))
BR1_rna_desc <- fileDescToDataframe_beta(BR1_rna_desc)
BR2_rna_desc <- fileDescToDataframe_beta(BR2_rna_desc)
UP1_rna_desc <- fileDescToDataframe_beta(UP1_rna_desc)
meta_data<-rbind.fill(BR1_rna_desc , BR2_rna_desc )
meta_data<-meta_data%>% filter(!file.batchID=="B004")%>% filter(!subject.subjectGuid%in% c("BR2007","BR2049",'BR1034'))
meta_data$pbmc_sample_id<-gsub("_","",paste0("PB0",substr(sub(".*PB0", "", meta_data$file.name),1,8)))
meta_data<-meta_data %>% filter(!grepl("EXP",file.batchID))%>% arrange(pbmc_sample_id)
meta_data<-meta_data[!duplicated(meta_data[, c("sample.sampleKitGuid")], fromLast=T),]%>% arrange(sample.sampleKitGuid)
meta_data<-meta_data%>%mutate(sample.visitName=ifelse(sample.visitName=="Other - Non-Flu",sample.visitDetails,sample.visitName) )
meta_data<-meta_data %>% filter(sample.visitName=='Flu Year 1 Day 0')
table(meta_data$cohort.cohortGuid)
BR1 BR2 47 45
UP1_rna_desc$Sample_ID<-paste0('PB',gsub('KT','',UP1_rna_desc$sample.sampleKitGuid))
UP_Sample_ID<-c("PB00809","PB00811","PB00193","PB00841",
"PB00842","PB00833","PB00910","PB00884",
"PB00892","PB00914","PB00913","PB00927",
"PB00928","PB02391","PB02392","PB03223")
UP1_rna_desc<-UP1_rna_desc %>% filter(Sample_ID %in% UP_Sample_ID)
meta_data<-rbind.fill(meta_data,UP1_rna_desc)
meta_data_scRNA_list<-list()
for (i in 1:dim(meta_data)[1]){
meta_data_scRNA_single <- cacheFiles(list(meta_data$file.id[i]))
print (i)
}
submitting request as query ID first... retrieving files using fileIDS...
[1] 1
submitting request as query ID first... retrieving files using fileIDS...
[1] 2
submitting request as query ID first... retrieving files using fileIDS...
[1] 3
submitting request as query ID first... retrieving files using fileIDS...
[1] 4
submitting request as query ID first... retrieving files using fileIDS...
Error in curl::curl_fetch_memory(url, handle = handle): Operation was aborted by an application callback Traceback: 1. cacheFiles(list(meta_data$file.id[i])) 2. doQuery(assembleQuery(id = ids_expanded, format = "all", endpoint = hydraEnvVar("searchExecEndpoint"))) 3. httr::GET(url, do.call(httr::add_headers, h)) 4. request_perform(req, hu$handle$handle) 5. request_fetch(req$output, req$url, handle) 6. request_fetch.write_memory(req$output, req$url, handle) 7. curl::curl_fetch_memory(url, handle = handle)
write.csv(meta_data,paste0("hise_meta_data_",Sys.Date(),".csv"))
files<-data.frame(list.files(path = "cache/", pattern = 'h5', all.files = TRUE,
full.names = FALSE, recursive = TRUE,
ignore.case = FALSE, include.dirs = FALSE, no.. = FALSE))
colnames(files)<-'file.path'
separated <- strsplit(files$file.path, split = '/')
# Use strsplit to separate each string on '/', then unlist and rbind to create a matrix
separated_matrix <- do.call(rbind, lapply(files$file.path, function(x) unlist(strsplit(x, split = '/'))))
# Convert the matrix to a data frame
df <- as.data.frame(separated_matrix, stringsAsFactors = FALSE)
# Set column names
colnames(df) <- c("file.id", "file.name.downloaded")
meta_data<-left_join(meta_data,cbind(df,files),by=('file.id'))
meta_data$file.path<-paste0('/home//jupyter/reference_generating_new/cache/',meta_data$file.path)
write.csv(meta_data,paste0("hise_meta_data_",Sys.Date(),".csv"))