library(DIscBIO) library(partykit) library(enrichR) FileName<-"valuesG1ms" # Name of the dataset CSV=TRUE # If the dataset has ".csv", the user shoud set CSV to TRUE #CSV=FALSE # If the dataset has ".rda", the user shoud set CSV to FALSE if (CSV==TRUE){ DataSet <- read.csv(file = paste0(FileName,".csv"), sep = ";",header=T) rownames(DataSet)<-DataSet[,1] DataSet<-DataSet[,-1] } else{ load(paste0(FileName,".rda")) DataSet<-get(FileName) } cat(paste0("The ", FileName," contains:","\n","Genes: ",length(DataSet[,1]),"\n","cells: ",length(DataSet[1,]),"\n")) sc<- DISCBIO(DataSet) # The DISCBIO class is the central object storing all information generated throughout the pipeline sc<-NoiseFiltering(sc,percentile=0.8, CV=0.3) #### Normalizing the reads without any further gene filtering sc<-Normalizedata(sc, mintotal=1000, minexpr=0, minnumber=0, maxexpr=Inf, downsample=FALSE, dsn=1, rseed=17000) sc<-FinalPreprocessing(sc,GeneFlitering="NoiseF",export = TRUE) ### The GeneFiltering can be either "NoiseF" or"ExpF" sc <- Exprmclust(sc,K =3,reduce = TRUE,quiet = TRUE) PlotmclustMB(sc) PCAplotSymbols(sc) # Plotting the model-based clusters in tSNE maps sc<- comptSNE(sc,rseed=15555,quiet = F) # to perform the computation of a t-SNE map plottSNE(sc) # Silhouette of Model-based clusters withr::with_par(mar=c(6,2,4,2)) plotSilhouette(sc,K=3) Jaccard(sc,Clustering="MB", K=3, plot = TRUE) # Jaccard of Model based clusters outlg<-round(length(sc@fdata[,1]) * 0.05) # The cell will be considered as an outlier if it has a minimum of 0.5% of the number of filtered genes as outlier genes. Outliers<- FindOutliers(sc, K=3, outminc=5,outlg=outlg,probthr=.5*1e-3,thr=2**-(1:40),outdistquant=.75, plot = TRUE, quiet = T) RemovingOutliers=FALSE # RemovingOutliers=TRUE # Removing the defined outlier cells based on K-means Clustering if(RemovingOutliers==TRUE){ names(Outliers)=NULL Outliers DataSet=DataSet[-Outliers] dim(DataSet) colnames(DataSet) cat("Outlier cells were removed, now you need to start from the beginning") } sc<-pseudoTimeOrdering(sc,quiet = FALSE, export = FALSE) PlotMBpca(sc,type="order") plotOrderTsne(sc) clustheatmap(sc,clustering_method = "model-based") g='ENSG00000210082' #### Plotting the expression of MT-RNR2 plotExptSNE(sc,g) g <- "ENSG00000210082" #### Plotting the expression of MT-RNR2 PlotMBpca(sc,g,type="exp") g <- "ENSG00000176890" #### Plotting the expression of TYMS PlotMBpca(sc,g,type="exp") ####### differential expression analysis between cluster 2 and cluster 3 of the Model-Based clustering using FDR of 0.05 MBcdiff<-DEGanalysis2clust(sc,Clustering="MB",K=3,fdr=0.05,name="Name",First="CL2",Second="CL3",export = TRUE,quiet=TRUE) #### To show the result table head(MBcdiff[[1]]) # The first component head(MBcdiff[[2]]) # The second component MBcdiff<-DEGanalysis(sc,Clustering="MB",K=3,fdr=0.05,name="all_clusters",export = TRUE,quiet=TRUE) ####### differential expression analysis between all clusters #### To show the result table head(MBcdiff[[1]]) # The first component head(MBcdiff[[2]]) # The second component MBcdiffBinomial<-ClustDiffGenes(sc,K=3,fdr=.01,export=TRUE, quiet=T) ########## Binomial differential expression analysis #### To show the result table head(MBcdiffBinomial[[1]]) # The first component head(MBcdiffBinomial[[2]]) # The second component name<-MBcdiffBinomial[[2]][2,4] ############ Selecting the "Up-DEG-cluster2.csv " from the DEGs' binomial table ############## U<-read.csv(file=paste0(name),head=TRUE,sep=",") Vplot<-VolcanoPlot(U,value=0.0001,name=name,FS=0.8,fc=0.75) sigDEG<-MBcdiff[[1]] # DEGs gene list from SANseq #sigDEG<-MBcdiffBinomial[[1]] # DEGs gene list from Binomial analysis First="CL2" Second="CL3" DATAforDT<-ClassVectoringDT(sc,Clustering="MB",K=3,First=First,Second=Second,sigDEG) j48dt<-J48DT(DATAforDT) summary(j48dt) j48dt<-J48DTeval(DATAforDT,num.folds=10,First=First,Second=Second) rpartDT<-RpartDT(DATAforDT) rpartEVAL<-RpartEVAL(DATAforDT,num.folds=10,First=First,Second=Second) DEGs="All DEGs" FileName=paste0(DEGs) #data<-MBcdiffBinomial[[1]] [,2] # DEGs gene list from Binomial analysis data<-MBcdiff[[1]] [,2] # From the table of the differential expression analysis between all pairs of clusters ppi<-PPI(data,FileName) networking<-NetAnalysis(ppi) networking ##### In case the Examine response components = 200 and an error "linkmat[i, ]" appeared, that means there are no PPI. # Plotting the network of the top 25 DEGs DATA<-data[1:25] network<-Networking(DATA,FileName,plot_width = 25, plot_height = 10) DEGs="Down-Reg-CL2" FileName=paste0(DEGs) ## Networking analysis of Down-regulated genes in cluster2 name<-MBcdiff[[2]][4,4] # From the table of the differential expression analysis between all pairs of clusters U<-read.csv(file=paste0(name),head=TRUE,sep=",") data<- U[,3] # Selecting the gene names ppi<-PPI(data,FileName) networking<-NetAnalysis(ppi) networking ##### In case the Examine response components = 200 and an error "linkmat[i, ]" appeared, that means there are no PPI. # Plotting the network network<-Networking(data,FileName,plot_width = 25, plot_height = 25) dbs <- listEnrichrDbs() head(dbs) #print(dbs) ############ Up-regulated genes in cluster 2 ############## #DEGs=MBcdiffBinomial[[2]][2,4] # Up-regulated genes in cluster 2 (from the Binomial analysis) DEGs=MBcdiff[[2]][2,6] # UP-regulated genes in cluster 2 (from SAMseq) data<-read.csv(file=paste0(DEGs),head=TRUE,sep=",") data<-as.character(data[,3]) dbs <- c("KEGG_2019_Human","GO_Biological_Process_2018") enriched <- enrichr(data, dbs) KEGG_2019_Human<-enriched[[1]][,c(1,2,3,9)] GO_Biological_Process_2018<-enriched[[2]][,c(1,2,3,9)] GEA<-rbind(KEGG_2019_Human,GO_Biological_Process_2018) GEA ############ Up-regulated genes in cluster 2 ############## DEGs=MBcdiffBinomial[[2]][2,4] # Up-regulated genes in cluster 2 (from the Binomial analysis) data<-read.csv(file=paste0(DEGs),head=TRUE,sep=",") data<-as.character(data[,3]) dbs <- c("KEGG_2019_Human","GO_Biological_Process_2018") enriched <- enrichr(data, dbs) KEGG_2019_Human<-enriched[[1]][,c(1,2,3,9)] GO_Biological_Process_2018<-enriched[[2]][,c(1,2,3,9)] GEA<-rbind(KEGG_2019_Human,GO_Biological_Process_2018) GEA