library(DIscBIO) Gclusters <- read.csv(file = "Granatum Clusters.csv", sep = ",",header=F) head(Gclusters) Gexpression <- read.csv(file = "Granatum expression.csv", sep = ",",header=F) rownames(Gexpression)<-Gexpression[,1] Gexpression<-Gexpression[-1,-1] head(Gexpression) Gdegs <- read.csv(file = "Granatum DEGs.csv", sep = ",",header=F) head(Gdegs) G_DEGs<-split(Gdegs,Gdegs[,1]) FileName<-"CTCdataset" # Name of the dataset #CSV=TRUE # If the dataset has ".csv", the user shoud set CSV to TRUE CSV=FALSE # If the dataset has ".rda", the user shoud set CSV to FALSE if (CSV==TRUE){ DataSet <- read.csv(file = paste0(FileName,".csv"), sep = ",",header=T) rownames(DataSet)<-DataSet[,1] DataSet<-DataSet[,-1] } else{ load(paste0(FileName,".rda")) DataSet<-get(FileName) } cat(paste0("The ", FileName," contains:","\n","Genes: ",length(DataSet[,1]),"\n","cells: ",length(DataSet[1,]),"\n")) sc<- DISCBIO(DataSet) # The DISCBIO class is the central object storing all information generated throughout the pipeline load("distances.Rdata") sc@distances<-distances # Estimating a value for the "minexpr" parameter S<-summary(rowMeans(DataSet,na.rm=TRUE)) # It gives an idea about the overall expression of the genes minexpr= S[3] # Estimating a value for the "minnumber" parameters minnumber= round(length(DataSet[1,])/10) # To be expressed in at 10% of the cells. sc<-Normalizedata(sc, mintotal=1, minexpr=minexpr, minnumber=minnumber, maxexpr=Inf, downsample=FALSE, dsn=1, rseed=17000) sc<-FinalPreprocessing(sc,GeneFlitering="ExpF",export = TRUE) # The GeneFiltering should be set to "ExpF" SharedGenes<-cbind(rownames(sc@fdata),rownames(Gexpression)) colnames(SharedGenes)<-c("DIscBIO","Granatum") gData<-rownames(Gexpression) dData<-rownames(sc@fdata) idx_genes <- is.element(gData,dData) SharedGenes<-gData[idx_genes] cat(paste0("There are ", length(SharedGenes)," shared genes between DIscBIO and Granatum.","\n")) set.seed(222) Km<-stats::kmeans(as.matrix(t(sc@fdata)),4) sc@kmeans$kpart<-Km[[1]] sc@cpart=Km[[1]] withr::with_options(repr.plot.width=12, repr.plot.height=25) plotSilhouette(sc,K=4) withr::with_options(repr.plot.width=10, repr.plot.height=12) Jaccard(sc,Clustering="K-means", K=4, plot = TRUE) # Jaccard f<-c(0.43, 0.348, 0.639, 0.576) cat(paste0("The mean Jaccard of all the clusters is ", round(mean(f), digits=1),"\n")) SharedCells<- data.frame(sc@kmeans$kpart,Gclusters[,3]) same<-c() for (i in 1: length(SharedCells[,1])){ if (SharedCells[i,1]=="1" & SharedCells[i,2]=="2"){ same<-c(same,i) } if (SharedCells[i,1]=="2" & SharedCells[i,2]=="3"){ same<-c(same,i) } if (SharedCells[i,1]=="3" & SharedCells[i,2]=="4"){ same<-c(same,i) } if (SharedCells[i,1]=="4" & SharedCells[i,2]=="1"){ same<-c(same,i) } } cat(paste0("There are ", length(same),"\n")) cat(paste0(round((length(same)/length(SharedCells[,1])) * 100,digits=1),"%", " of the cells are shared between DIscBIO and Granatum","\n")) Granatum<-sc colnames(Gexpression)<-colnames(sc@fdata) Granatum@fdata<-Gexpression Granatum@kmeans$kpart<-Gclusters[,3] names(Granatum@kmeans$kpart)<-Gclusters[,1] withr::with_options(repr.plot.width=12, repr.plot.height=25) plotSilhouette(Granatum,K=4) withr::with_options(repr.plot.width=10, repr.plot.height=12) Jaccard(Granatum,Clustering="K-means", K=4, plot = TRUE) # Jaccard f<-c(0.611, 0.273, 0.305, 0.625) cat(paste0("The mean Jaccard of all the clusters is ", round(mean(f), digits=1),"\n")) #cdiff<-DEGanalysis(sc,Clustering="K-means",K=4,fdr=0.05,name="DIscBIO",export = TRUE,quiet=TRUE) ####### differential expression analysis between all clusters #### To show the result table #head(cdiff[[1]]) # The first component #head(cdiff[[2]]) # The second component #df1<-rbind(read.csv(file=paste0(cdiff[[2]][1,4]),head=TRUE,sep=","),read.csv(file=paste0(cdiff[[2]][1,6]),head=TRUE,sep=",")) df1<-rbind(read.csv(file="Up-regulated-DIscBIOCl4inCl1VSCl4.csv",head=TRUE,sep=","),read.csv(file="Low-regulated-DIscBIOCl4inCl1VSCl4.csv",head=TRUE,sep=",")) genes1vs4<-df1[,4] #df2<-rbind(read.csv(file=paste0(cdiff[[2]][2,4]),head=TRUE,sep=","),read.csv(file=paste0(cdiff[[2]][2,6]),head=TRUE,sep=",")) df2<-rbind(read.csv(file="Up-regulated-DIscBIOCl3inCl1VSCl3.csv",head=TRUE,sep=","),read.csv(file="Low-regulated-DIscBIOCl3inCl1VSCl3.csv",head=TRUE,sep=",")) genes1vs3<-df2[,4] #df3<-rbind(read.csv(file=paste0(cdiff[[2]][3,4]),head=TRUE,sep=","),read.csv(file=paste0(cdiff[[2]][3,6]),head=TRUE,sep=",")) df3<-rbind(read.csv(file="Up-regulated-DIscBIOCl2inCl1VSCl2.csv",head=TRUE,sep=","),read.csv(file="Low-regulated-DIscBIOCl2inCl1VSCl2.csv",head=TRUE,sep=",")) genes1vs2<-df3[,4] #df4<-rbind(read.csv(file=paste0(cdiff[[2]][4,4]),head=TRUE,sep=","),read.csv(file=paste0(cdiff[[2]][4,6]),head=TRUE,sep=",")) df4<-rbind(read.csv(file="Up-regulated-DIscBIOCl3inCl4VSCl3.csv",head=TRUE,sep=","),read.csv(file="Low-regulated-DIscBIOCl3inCl4VSCl3.csv",head=TRUE,sep=",")) genes3vs4<-df4[,4] #df5<-rbind(read.csv(file=paste0(cdiff[[2]][5,4]),head=TRUE,sep=","),read.csv(file=paste0(cdiff[[2]][5,6]),head=TRUE,sep=",")) df5<-rbind(read.csv(file="Up-regulated-DIscBIOCl4inCl2VSCl4.csv",head=TRUE,sep=","),read.csv(file="Low-regulated-DIscBIOCl4inCl2VSCl4.csv",head=TRUE,sep=",")) genes2vs4<-df5[,4] #df6<-rbind(read.csv(file=paste0(cdiff[[2]][6,4]),head=TRUE,sep=","),read.csv(file=paste0(cdiff[[2]][6,6]),head=TRUE,sep=",")) df6<-rbind(read.csv(file="Up-regulated-DIscBIOCl2inCl3VSCl2.csv",head=TRUE,sep=","),read.csv(file="Low-regulated-DIscBIOCl2inCl3VSCl2.csv",head=TRUE,sep=",")) genes3vs2<-df6[,4] ################# [CL1 vs CL2 DIscBIO] vs [CL2 vs CL3 Granatum] dData<-genes1vs2 cat(paste0("There are ", length(dData), " DEGs detected by DIscBIO in CL1 vs CL2","\n")) target=G_DEGs$`2 vs. 3` S1<-subset(target,target[,6]>1.96) targetGeneList<-S1[,2] cat(paste0("There are ", length(targetGeneList), " DEGs detected by Granatum in CL2 vs CL3","\n")) gData<-targetGeneList idx_genes <- is.element(dData,gData) SharedGenes<-dData[idx_genes] cat(paste0("There are ", length(SharedGenes), " DEGs shared between DIscBIO and Granatum","\n")) df3[idx_genes,] ################# [CL1 vs CL3 DIscBIO] vs [CL2 vs CL4 Granatum] dData<-genes1vs3 cat(paste0("There are ", length(dData), " DEGs detected by DIscBIO in CL1 vs CL3","\n")) target=G_DEGs$`2 vs. 4` S1<-subset(target,target[,6]>1.96) targetGeneList<-S1[,2] cat(paste0("There are ", length(targetGeneList), " DEGs detected by Granatum in CL2 vs CL4","\n")) gData<-targetGeneList idx_genes <- is.element(dData,gData) SharedGenes<-dData[idx_genes] cat(paste0("There are ", length(SharedGenes), " DEGs shared between DIscBIO and Granatum","\n")) df2[idx_genes,] ################# [CL1 vs CL4 DIscBIO] vs [CL1 vs CL2 Granatum] dData<-genes1vs4 cat(paste0("There are ", length(dData), " DEGs detected by DIscBIO in CL1 vs CL4","\n")) target=G_DEGs$`1 vs. 2` S1<-subset(target,target[,6]>1.96) targetGeneList<-S1[,2] cat(paste0("There are ", length(targetGeneList), " DEGs detected by Granatum in CL1 vs CL2","\n")) gData<-targetGeneList idx_genes <- is.element(dData,gData) SharedGenes<-dData[idx_genes] cat(paste0("There are ", length(SharedGenes), " DEGs shared between DIscBIO and Granatum","\n")) df1[idx_genes,] ################# [CL2 vs CL3 DIscBIO] vs [CL3 vs CL4 Granatum] dData<-genes3vs2 cat(paste0("There are ", length(dData), " DEGs detected by DIscBIO in CL2 vs CL3","\n")) target=G_DEGs$`3 vs. 4` S1<-subset(target,target[,6]>1.96) targetGeneList<-S1[,2] cat(paste0("There are ", length(targetGeneList), " DEGs detected by Granatum in CL3 vs CL4","\n")) gData<-targetGeneList idx_genes <- is.element(dData,gData) SharedGenes<-dData[idx_genes] cat(paste0("There are ", length(SharedGenes), " DEGs shared between DIscBIO and Granatum","\n")) df6[idx_genes,] ################# [CL2 vs CL4 DIscBIO] vs [CL1 vs CL3 Granatum] dData<-genes2vs4 cat(paste0("There are ", length(dData), " DEGs detected by DIscBIO in CL2 vs CL4","\n")) target=G_DEGs$`1 vs. 3` S1<-subset(target,target[,6]>1.96) targetGeneList<-S1[,2] length(targetGeneList) cat(paste0("There are ", length(targetGeneList), " DEGs detected by Granatum in CL1 vs CL3","\n")) gData<-targetGeneList idx_genes <- is.element(dData,gData) SharedGenes<-dData[idx_genes] cat(paste0("There are ", length(SharedGenes), " DEGs shared between DIscBIO and Granatum","\n")) df5[idx_genes,] ################# [CL3 vs CL4 DIscBIO] vs [CL1 vs CL4 Granatum] dData<-genes3vs4 length(dData) cat(paste0("There are ", length(dData), " DEGs detected by DIscBIO in CL3 vs CL4","\n")) target=G_DEGs$`1 vs. 4` S1<-subset(target,target[,6]>1.96) targetGeneList<-S1[,2] length(targetGeneList) gData<-targetGeneList idx_genes <- is.element(dData,gData) SharedGenes<-dData[idx_genes] cat(paste0("There are ", length(SharedGenes), " DEGs shared between DIscBIO and Granatum","\n")) df4[idx_genes,] load("tSNE.Rdata") sc@tsne<-tSNE #sc<- comptSNE(sc,rseed=15555,max_iter=500, epoch=100,quiet = T) # to perform the computation of a t-SNE map plottSNE(sc) set.seed(1000) #TS<-tsne::tsne(t(Gexpression),max_iter=500, epoch=100) load("TS.Rdata") Granatum@tsne<-as.data.frame(TS) plottSNE(Granatum) sc<-pseudoTimeOrdering(sc,quiet = TRUE, export = FALSE) plotOrderTsne(sc)