library(DIscBIO)

FileName<-"CTCdataset"        # Name of the dataset
#CSV=TRUE                     # If the dataset has ".csv", the user shoud set CSV to TRUE
CSV=FALSE                     # If the dataset has ".rda", the user shoud set CSV to FALSE

if (CSV==TRUE){
    DataSet <- read.csv(file = paste0(FileName,".csv"), sep = ",",header=T)
    rownames(DataSet)<-DataSet[,1]
    DataSet<-DataSet[,-1]
} else{
    load(paste0(FileName,".rda"))
    DataSet<-get(FileName)
}
cat(paste0("The ", FileName," contains:","\n","Genes: ",length(DataSet[,1]),"\n","cells: ",length(DataSet[1,]),"\n"))

FG<- DISCBIO(DataSet)
FG<-Normalizedata(FG, mintotal=1000, minexpr=0, minnumber=0, maxexpr=Inf, downsample=FALSE, dsn=1, rseed=17000) 
FG<-FinalPreprocessing(FG,GeneFlitering="ExpF",export = TRUE)        # The GeneFiltering should be set to "ExpF"

GolgiFragGeneList<- read.csv(file = "GolgiFragGeneList.csv", sep = ",",header=F)
Data<-FG@fdata 
genes<-rownames(Data)
gene_list<- GolgiFragGeneList[,1]
idx_genes <- is.element(genes,gene_list)
OAdf<-Data[idx_genes,]   
FG@fdata<-OAdf
dim(FG@fdata)
cat(paste0("A list of ", length(OAdf[,1]), " genes will be used for the clustering","\n"))

load("fg.RData")               # Loading the "fg" object that includes the data of the k-means clustering 
FG<-fg                         # Storing the data of fg in the FG 
########## Removing the unneeded objects
rm(DataSet)
rm(fg)
rm(Data)
rm(OAdf)

plotGap(FG)                                               ### Plotting gap statistics

############ Plotting the clusters
withr::with_options(repr.plot.width=12, repr.plot.height=12)
plottSNE(FG)

FG<-pseudoTimeOrdering(FG,quiet = TRUE, export = FALSE)
plotOrderTsne(FG)

# Silhouette plot
withr::with_options(repr.plot.width=12, repr.plot.height=25)
plotSilhouette(FG,K=4)       # K is the number of clusters

# Jaccard Plot
withr::with_options(repr.plot.width=12, repr.plot.height=12)
Jaccard(FG,Clustering="K-means", K=4, plot = TRUE)     # Jaccard 

g='ENSG00000171611'                   #### Plotting the log expression of  PTCRA 
plotExptSNE(FG,g)

g='ENSG00000111057'                   #### Plotting the log expression of  KRT18  
plotExptSNE(FG,g)