library(DIscBIO) FileName<-"CTCdataset" # Name of the dataset #CSV=TRUE # If the dataset has ".csv", the user shoud set CSV to TRUE CSV=FALSE # If the dataset has ".rda", the user shoud set CSV to FALSE if (CSV==TRUE){ DataSet <- read.csv(file = paste0(FileName,".csv"), sep = ",",header=T) rownames(DataSet)<-DataSet[,1] DataSet<-DataSet[,-1] } else{ load(paste0(FileName,".rda")) DataSet<-get(FileName) } cat(paste0("The ", FileName," contains:","\n","Genes: ",length(DataSet[,1]),"\n","cells: ",length(DataSet[1,]),"\n")) FG<- DISCBIO(DataSet) FG<-Normalizedata(FG, mintotal=1000, minexpr=0, minnumber=0, maxexpr=Inf, downsample=FALSE, dsn=1, rseed=17000) FG<-FinalPreprocessing(FG,GeneFlitering="ExpF",export = TRUE) # The GeneFiltering should be set to "ExpF" GolgiFragGeneList<- read.csv(file = "GolgiFragGeneList.csv", sep = ",",header=F) Data<-FG@fdata genes<-rownames(Data) gene_list<- GolgiFragGeneList[,1] idx_genes <- is.element(genes,gene_list) OAdf<-Data[idx_genes,] FG@fdata<-OAdf dim(FG@fdata) cat(paste0("A list of ", length(OAdf[,1]), " genes will be used for the clustering","\n")) load("fg.RData") # Loading the "fg" object that includes the data of the k-means clustering FG<-fg # Storing the data of fg in the FG ########## Removing the unneeded objects rm(DataSet) rm(fg) rm(Data) rm(OAdf) plotGap(FG) ### Plotting gap statistics ############ Plotting the clusters withr::with_options(repr.plot.width=12, repr.plot.height=12) plottSNE(FG) FG<-pseudoTimeOrdering(FG,quiet = TRUE, export = FALSE) plotOrderTsne(FG) # Silhouette plot withr::with_options(repr.plot.width=12, repr.plot.height=25) plotSilhouette(FG,K=4) # K is the number of clusters # Jaccard Plot withr::with_options(repr.plot.width=12, repr.plot.height=12) Jaccard(FG,Clustering="K-means", K=4, plot = TRUE) # Jaccard g='ENSG00000171611' #### Plotting the log expression of PTCRA plotExptSNE(FG,g) g='ENSG00000111057' #### Plotting the log expression of KRT18 plotExptSNE(FG,g)