library(DIscBIO)
library(partykit)
library(enrichR)

FileName<-"valuesG1ms"       # Name of the dataset
CSV=TRUE                     # If the dataset has ".csv", the user shoud set CSV to TRUE
#CSV=FALSE                   # If the dataset has ".rda", the user shoud set CSV to FALSE

if (CSV==TRUE){
    DataSet <- read.csv(file = paste0(FileName,".csv"), sep = ";",header=T)
    rownames(DataSet)<-DataSet[,1]
    DataSet<-DataSet[,-1]
} else{
    load(paste0(FileName,".rda"))
    DataSet<-get(FileName)
}
cat(paste0("The ", FileName," contains:","\n","Genes: ",length(DataSet[,1]),"\n","cells: ",length(DataSet[1,]),"\n"))

sc<- DISCBIO(DataSet)       # The DISCBIO class is the central object storing all information generated throughout the pipeline 

sc<-NoiseFiltering(sc,percentile=0.8, CV=0.3)       

####  Normalizing the reads without any further gene filtering
sc<-Normalizedata(sc, mintotal=1000, minexpr=0, minnumber=0, maxexpr=Inf, downsample=FALSE, dsn=1, rseed=17000) 
sc<-FinalPreprocessing(sc,GeneFlitering="NoiseF",export = TRUE) ### The GeneFiltering can be either "NoiseF" or"ExpF"

sc <- Exprmclust(sc,K =3,reduce = TRUE,quiet = TRUE) 

PlotmclustMB(sc)
PCAplotSymbols(sc)

# Plotting the model-based clusters in tSNE maps
sc<- comptSNE(sc,rseed=15555,quiet = F)          # to perform the computation of a t-SNE map
plottSNE(sc)

# Silhouette of Model-based clusters
withr::with_par(mar=c(6,2,4,2))
plotSilhouette(sc,K=3)

Jaccard(sc,Clustering="MB", K=3, plot = TRUE)    # Jaccard of Model based clusters

outlg<-round(length(sc@fdata[,1]) * 0.05)     # The cell will be considered as an outlier if it has a minimum of 0.5% of the number of filtered genes as outlier genes. 
Outliers<- FindOutliers(sc, K=3, outminc=5,outlg=outlg,probthr=.5*1e-3,thr=2**-(1:40),outdistquant=.75, plot = TRUE, quiet = T)

RemovingOutliers=FALSE     
# RemovingOutliers=TRUE                    # Removing the defined outlier cells based on K-means Clustering

if(RemovingOutliers==TRUE){
    names(Outliers)=NULL
    Outliers
    DataSet=DataSet[-Outliers]
    dim(DataSet)
    colnames(DataSet)
    cat("Outlier cells were removed, now you need to start from the beginning")
}

sc<-pseudoTimeOrdering(sc,quiet = FALSE, export = FALSE)

PlotMBpca(sc,type="order")

plotOrderTsne(sc)

clustheatmap(sc,clustering_method = "model-based")            

g='ENSG00000210082'                #### Plotting the expression of  MT-RNR2
plotExptSNE(sc,g)

g <- "ENSG00000210082"       #### Plotting the expression of  MT-RNR2
PlotMBpca(sc,g,type="exp")

g <- "ENSG00000176890"       #### Plotting the expression of  TYMS
PlotMBpca(sc,g,type="exp")

####### differential expression analysis between cluster 2 and cluster 3 of the Model-Based clustering using FDR of 0.05
MBcdiff<-DEGanalysis2clust(sc,Clustering="MB",K=3,fdr=0.05,name="Name",First="CL2",Second="CL3",export = TRUE,quiet=TRUE)   
#### To show the result table
head(MBcdiff[[1]])                  # The first component 
head(MBcdiff[[2]])                  # The second component 

MBcdiff<-DEGanalysis(sc,Clustering="MB",K=3,fdr=0.05,name="all_clusters",export = TRUE,quiet=TRUE)   ####### differential expression analysis between all clusters
#### To show the result table
head(MBcdiff[[1]])                  # The first component 
head(MBcdiff[[2]])                  # The second component

MBcdiffBinomial<-ClustDiffGenes(sc,K=3,fdr=.01,export=TRUE, quiet=T)    ########## Binomial differential expression analysis
#### To show the result table
head(MBcdiffBinomial[[1]])                  # The first component 
head(MBcdiffBinomial[[2]])                  # The second component

name<-MBcdiffBinomial[[2]][2,4]    ############ Selecting the "Up-DEG-cluster2.csv " from the DEGs' binomial table  ##############
U<-read.csv(file=paste0(name),head=TRUE,sep=",")
Vplot<-VolcanoPlot(U,value=0.0001,name=name,FS=0.8,fc=0.75)

sigDEG<-MBcdiff[[1]]               # DEGs gene list from SANseq
#sigDEG<-MBcdiffBinomial[[1]]      # DEGs gene list from Binomial analysis
First="CL2"
Second="CL3"
DATAforDT<-ClassVectoringDT(sc,Clustering="MB",K=3,First=First,Second=Second,sigDEG)

j48dt<-J48DT(DATAforDT)
summary(j48dt) 

j48dt<-J48DTeval(DATAforDT,num.folds=10,First=First,Second=Second)

rpartDT<-RpartDT(DATAforDT)

rpartEVAL<-RpartEVAL(DATAforDT,num.folds=10,First=First,Second=Second)

DEGs="All DEGs"
FileName=paste0(DEGs)

#data<-MBcdiffBinomial[[1]] [,2]    # DEGs gene list from Binomial analysis
data<-MBcdiff[[1]] [,2]             # From the table of the differential expression analysis between all pairs of clusters

ppi<-PPI(data,FileName)

networking<-NetAnalysis(ppi)
networking                            ##### In case the Examine response components = 200 and an error "linkmat[i, ]" appeared, that means there are no PPI.

# Plotting the network of the top 25 DEGs
DATA<-data[1:25]
network<-Networking(DATA,FileName,plot_width = 25, plot_height = 10)

DEGs="Down-Reg-CL2"
FileName=paste0(DEGs)

## Networking analysis of Down-regulated genes in cluster2
name<-MBcdiff[[2]][4,4]            # From the table of the differential expression analysis between all pairs of clusters
U<-read.csv(file=paste0(name),head=TRUE,sep=",")
data<- U[,3]                       # Selecting the gene names
ppi<-PPI(data,FileName)

networking<-NetAnalysis(ppi)
networking                            ##### In case the Examine response components = 200 and an error "linkmat[i, ]" appeared, that means there are no PPI.

# Plotting the network 
network<-Networking(data,FileName,plot_width = 25, plot_height = 25)

dbs <- listEnrichrDbs()
head(dbs)
#print(dbs)

############ Up-regulated genes in cluster 2  ##############
#DEGs=MBcdiffBinomial[[2]][2,4]     # Up-regulated genes in cluster 2 (from the Binomial analysis)
DEGs=MBcdiff[[2]][2,6]              # UP-regulated genes in cluster 2 (from SAMseq)
data<-read.csv(file=paste0(DEGs),head=TRUE,sep=",")
data<-as.character(data[,3])

dbs <- c("KEGG_2019_Human","GO_Biological_Process_2018")
enriched <- enrichr(data, dbs)
KEGG_2019_Human<-enriched[[1]][,c(1,2,3,9)]
GO_Biological_Process_2018<-enriched[[2]][,c(1,2,3,9)]

GEA<-rbind(KEGG_2019_Human,GO_Biological_Process_2018)
GEA

############ Up-regulated genes in cluster 2  ##############
DEGs=MBcdiffBinomial[[2]][2,4]     # Up-regulated genes in cluster 2 (from the Binomial analysis)
data<-read.csv(file=paste0(DEGs),head=TRUE,sep=",")
data<-as.character(data[,3])

dbs <- c("KEGG_2019_Human","GO_Biological_Process_2018")
enriched <- enrichr(data, dbs)
KEGG_2019_Human<-enriched[[1]][,c(1,2,3,9)]
GO_Biological_Process_2018<-enriched[[2]][,c(1,2,3,9)]

GEA<-rbind(KEGG_2019_Human,GO_Biological_Process_2018)
GEA