library(DIscBIO) library(partykit) library(enrichR) library(ggplot2) load("FGcdiffBinomial.RData") #### To show the result table head(FGcdiffBinomial[[1]]) # The first component head(FGcdiffBinomial[[2]]) # The second component name<-"UpRegulated-DEG-cluster3" ############ Selecting the DEGs' U<-read.csv(file="UpRegulated-DEG-cluster3.csv",head=TRUE,sep=",") Vplot<-VolcanoPlot(U,value=0.0001,name=name,FS=0.7,fc=0.75) ###################### Finding biomarker genes between cluster 3 and cluster 4 First="CL3" Second="CL4" load("FGDATAforDT.RData") # Preparing the dataset for the decision trees j48dt<-J48DT(FGDATAforDT) #J48 Decision Tree summary(j48dt) rm(j48dt) rpartDT<-RpartDT(FGDATAforDT) rm(rpartDT) ############ Selecting the DEGs' table ############## data<-read.csv(file="UpRegulated-DEG-cluster3.csv",head=TRUE,sep=",") data<-as.character(data[1:200,3]) ## Checking the first 200 DEGs ###### Getting the protein-protein interactions ############# ppi<-PPI(data) ############# Networking Analysis ################ networking<-NetAnalysis(ppi) networking ##### In case the Examine response components = 200 and an error "linkmat[i, ]" appeared, that means there are no PPI. ############ Plotting the Network ################## Data<-data[1:100] ## Plotting the first 100 DEGs network<-Networking(Data) ############ Selecting the DEGs' table ############## data<-read.csv(file="UpRegulated-DEG-cluster3.csv",head=TRUE,sep=",") # Up-regulated genes in cluster 3 (from the Binomial analysis) data<-as.character(data[,3]) ## Checking the first 200 DEGs dbs <- c("KEGG_2019_Human","GO_Biological_Process_2018") enriched <- enrichr(data, dbs) KEGG_2019_Human<-enriched[[1]][,c(1,2,3,9)] GO_Biological_Process_2018<-enriched[[2]][,c(1,2,3,9)] GEA<-rbind(KEGG_2019_Human,GO_Biological_Process_2018) GEA load("fg.RData") # Loading the "fg" object that has include the data of the k-means clustering FG<-fg FG<-pseudoTimeOrdering(FG,quiet = TRUE, export = FALSE) FG@kordering<-FG@kordering[order(-FG@kordering)] NewNames<-names(FG@kordering) OrderedDataSet<-FG@ndata for (i in 1:length(FG@kordering)){ OrderedDataSet[1:length(FG@ndata[,1]),i]<-FG@ndata[1:length(FG@ndata[,1]),FG@kordering[i]] } colnames(OrderedDataSet)<-NewNames COL=rainbow(20) ## Spearman correlation between TGFB1 and KRT18 KRT18<-as.numeric(OrderedDataSet["ENSG00000111057",]) TGFB1<-as.numeric(OrderedDataSet["ENSG00000105329",]) cor.test(TGFB1,KRT18, method = "spearman",exact=F) ## Spearman correlation between MMD and ZEB1 MMD<-as.numeric(OrderedDataSet["ENSG00000108960",]) ZEB1<-as.numeric(OrderedDataSet["ENSG00000148516",]) cor.test(MMD, ZEB1, method = "spearman",,exact=F) ################## Loading the EMT gene list S <- read.csv(file = "EMT_List.csv", sep = ",",header=F) ####################### Plotting the expression of each gene gene<-S[1,1] cells<-c(1:length(FG@ndata[1,])) expression=log(t(OrderedDataSet[gene,])) names(expression)=NULL ID<-c(rep(gene,1448)) NewDF1=data.frame(expression,cells,ID) rownames(NewDF1)=NULL colnames(NewDF1)=c("expression","cells","GeneID") ########################################################### to get the smooth line p=ggplot(NewDF1,aes(x=cells,y=expression),colour=col) + # Line for each gene geom_smooth(size = 2, se = F, color = "orange")+ labs(y="Gene Expression", x = "Cells' pseudotime order") g<-ggplot_build(p) par(mar = c(5, 4, 1, 3.6)) plot(g$data[[1]][,1],g$data[[1]][,2],col=COL[1], type="l", las=1,xlab="Pseudo-time-ordering",ylab="Log Gene Expression",ylim=c(-3,3),lwd = 3,xaxt = "n") ############################### to show cluster ID at the x axis part=FG@kmeans$kpart col=c() for (i in NewNames){ col=c(col,as.numeric(part[i])) } coll = c("black", "blue", "green", "red", "yellow", "gray") for (i in 1: length(part)){ points(i,-3.16,col = coll[col[i]],pch = 15, cex = 1.5) } genes<-S[,1] SYM<-S[,2] for ( i in 1:length(S[,1])){ gene<-genes[i] cells<-c(1:1448) expression=log(t(OrderedDataSet[gene,])) names(expression)=NULL ID<-c(rep(gene,1448)) NewDF3=data.frame(expression,cells,ID) rownames(NewDF3)=NULL colnames(NewDF3)=c("expression","cells","GeneID") p=ggplot(NewDF3,aes(x=cells,y=expression),colour=col) + # Line for each gene geom_smooth(size = 2, se = F, color = COL[i])+ labs(y="Gene Expression", x = "Cells' pseudotime order") ggg<-ggplot_build(p) ############################# to extract the xy lines(ggg$data[[1]][,1],ggg$data[[1]][,2],col=COL[i],lwd = 3) } add_legend <- function(...) { opar <- par(fig=c(0, 1, 0, 1), oma=c(0, 0, 0, 0), mar=c(0, 0, 0, 0), new=TRUE) on.exit(par(opar)) plot(0, 0, type='n', bty='n', xaxt='n', yaxt='n') legend(...) } add_legend("topright", legend=SYM, pch=19, col=COL[1:length(S[,1])], horiz=F, bty='n', cex=0.7) #### Plotting the heatmap load("fg.RData") # Loading the "fg" object that includes the data of the k-means clustering FG<-fg # Storing the data of fg in the FG rm(fg) # Removing the unneeded objects clustheatmap(FG)