hummingage <- function(path,infile,segments,DiscardOutliers,EstimateNullRate,token,plot_format) {
    ##                                                                                                                                          
    ## usage:                                                                                                                                   
    ## hummingage("/home/user/data/","myfile.csv",c(NaN),TRUE,"estimate","KJmpmGgQ","png,pdf")                                                  
    ## hummingage("/home/user/data/","myfile.csv",c(124),FALSE,"estimate","KJmpmGgQ","png")                                                     
    ## hummingage("/home/user/data/","myfile.csv",c(334,856,1223),TRUE,"estimate","KJmpmGgQ","pdf")                                             
    ## segments in cm                                                                                                                           

    ## path: (string): is the path to the infile                                                                                                
    ## infile: (string): is the data file according to a certain format                                                                         
    ## segments: (integer array): is an array indicating roughly the positions of the segments, use c(NaN) for no segments                      
    ## DiscardOutliers: (boolean): can be TRUE or FALSE. If true, a simple algorithm is applied to identify outliers                            
    ## EstimateNullRate: (string or integer): possible values are                                                                               
    ##                   "estimate": the x-axis offset is estimated from the data                                                               
    ##                   "null": the x-axis offset is set to 0                                                                                  
    ##                   negative or positive integer: x-axis offset is set to the inserted value, e.g. -42, 14, ... (in cm)                    
    ## token: (string): this is a random string, which can be choosen arbitrarily                                                               
    ## plot_format (string): this can be "png,pdf", "png" or "pdf" and defines what kinds of images are written to disk                         
    ##                       both, png and pdf or only png or pdf                                                                               
    ##  
    ...
    }

path <- "/home/jovyan/work/hummingage/"
infile <- "my_test_data_1.csv"
segments <- c(420)
DiscardOutliers <- TRUE
EstimateNullRate <- "estimate"
token <- "KHzvfrTgfX"
#plot_format <- "png,pdf"
plot_format <- "jupyter"                                                                                                                   
#plot_format <- ""

    ## path: (string): is the path to the infile                                                                                                
    ## infile: (string): is the data file according to a certain format                                                                         
    ## segments: (integer array): is an array indicating roughly the positions of the segments, use c(NaN) for no segments                      
    ## DiscardOutliers: (boolean): can be TRUE or FALSE. If true, a simple algorithm is applied to identify outliers                            
    ## EstimateNullRate: (string or integer): possible values are                                                                               
    ##                   "estimate": the x-axis offset is estimated from the data                                                               
    ##                   "null": the x-axis offset is set to 0                                                                                  
    ##                   negative or positive integer: x-axis offset is set to the inserted value, e.g. -42, 14, ... (in cm)                    
    ## token: (string): this is a random string, which can be choosen arbitrarily                                                               
    ## plot_format (string): this can be "png,pdf", "png" or "pdf" and defines what kinds of images are written to disk                         
    ##                       both, png and pdf or only png or pdf                                                                               
    ##  

## full file-path                                                                                                                                                                        
    xfile <- paste(path,infile,sep="");

    ## transform plot_format to array                                                                                                                                                        
    plot_format_tmp <- strsplit(plot_format,",")
    plot_format <- plot_format_tmp[[1]]


    ## load the infile and save as ageDataOrig                                                                                                                                               
    ageDataOrig <- read.table(xfile, header = TRUE,sep=",")

    ## if no segment will be used set it to empty                                                                                                                                            
    if (is.nan(segments)){
        segments = c()
    }

    ## calculate length and estimate rates                                                                                                                                                   
    lengthData <- length(ageDataOrig$depth)
    diffDepths <- diff(ageDataOrig$depth)
    accRatesOrig <- diff(ageDataOrig$age)/diffDepths


##--------------------------------------------------------------------------------------------##                                                                                         
    ## this block is only executed if DiscardOutliers == TRUE                                                                                                                                
    ## identify negative rates, i.e. outliers                                                                                                                                                
    ## the difficulty is to identify the outlier                                                                                                                                             
    ## i.e. which age is the outlier, because the rate is calculated from two ages                                                                                                           
    ## thus we take the ages left and right to the two ages which make the negative rate                                                                                                     
    ## and then we calculate which age is further away from the mean of the two left                                                                                                         
    ## and rights                                                                                                                                                                            
    if (DiscardOutliers==TRUE){
        ## find negative rates                                                                                                                                                               
        negRatesIndex <- which(accRatesOrig < 0)
    } else {
        ## set this to empty                                                                                                                                                                 
        negRatesIndex <- integer(0)
    }
    ## make a copy of the indices of the negative rates                                                                                                                                      
    negRatesIndexCopy <- negRatesIndex

    ## go through the negative rates and decide which age is the outlier, left or right                                                                                                      
    m <- 1
    for (i in negRatesIndexCopy) {
        ## skip the first                                                                                                                                                                    
        if (i==1){
            next
        }
        ## calculate the mean between the left and right age next to the two                                                                                                                 
        ## ages, which where used to calculate the rate                                                                                                                                      
        TheMean <- (ageDataOrig$age[(i-1)]+ageDataOrig$age[(i+2)])/2
        ## distance of the left age to the mean                                                                                                                                              
        dist1 <- abs(ageDataOrig$age[(i)]-TheMean)
        ## distance of the right age to the mean                                                                                                                                             
        dist2 <- abs(ageDataOrig$age[(i+1)]-TheMean)
        ## if the right is further away then this is the outlier                                                                                                                             
        ## so increment the index                                                                                                                                                            
        ## otherwise it's the left and leave the index                                                                                                                                       
        if (dist2>dist1){
            negRatesIndex[m] <- negRatesIndex[m]+1
        }
        m <- m+1
    }
    ##--------------------------------------------------------------------------------------------##  

## remove extreme rates and exclude the first                                                                                                                                            
    ## because the first must be estimated later below                                                                                                                                       
    ## remove outliers from original data                                                                                                                                                    
    ## or copy data as they are, if there are no outliers                                                                                                                                    
    LnegRatesIndex <- length(negRatesIndex)
    if (LnegRatesIndex>0){
        ## remove data                                                                                                                                                                       
        ageData <- ageDataOrig[-negRatesIndex,]
    } else {
        ## leave data as they are                                                                                                                                                            
        ageData <- ageDataOrig
    }

    ## now, outliers have been removed calculate again length, rates etc.                                                                                                                    
    lengthData <- length(ageData$depth)
    diffDepths <- diff(ageData$depth)
    accRates <- diff(ageData$age)/diffDepths
    sigmaRates <- sqrt((ageData$error[2:lengthData]^2+ageData$error[1:(lengthData-1)]^2))/diffDepths


 ## convert segments from depth to point number                                                                                                                                           
    ## find the position, i.e. between the two closest points                                                                                                                                
    Lsegments <- length(segments)
    if (Lsegments == 0) {
        segPos = c()
    } else {
        segPos <- rep(0,Lsegments)
        for (i in seq(1,Lsegments)){
            ## difference between depth variable and a sequence position (in cm)                                                                                                             
            Xdiff <- ageData$depth-segments[i]
            ## find the indices smaller than the segment position                                                                                                                            
            a <- which(Xdiff<0)
            ## set the position at the last negative                                                                                                                                         
            segPos[i] <- max(a)
        }
    }

    ## copy of segPos                                                                                                                                                                        
    segPosPlot <- segPos
    ## add left and right boundaries to segPos                                                                                                                                               
    segPos <- c(0,segPos,lengthData)
    segPosLength <- length(segPos)


##--------------------------------------------------------------------------------------------##                                                                                         
    ##  estimate the x-axis crossing to estimate the first rate                                                                                                                              
    ##  if we would take the origin 0,0 than the first rate will be wrong                                                                                                                    
    ##  because there is an offset                                                                                                                                                           
    ##                                                                                                                                                                                       
    ##  so, we fit the first segment and estimate the x-crossing                                                                                                                             
    ##  cut out first segment                                                                                                                                                                
    xcrossing <- 0
    if (EstimateNullRate == "estimate"){
        N <- 3
        Lu <- length(1:N)
        u <- ageData$age[1:N]
        X <- replicate(2,rep(1,Lu))
        X[,2] <- ageData$depth[1:N]
        ##  print(X)                                                                                                                                                                         
        beta <- solve(t(X) %*% X) %*% t(X) %*% u
        ##  print(beta)                                                                                                                                                                      
        ##  only if slope is positive                                                                                                                                                        
        if (beta[2]>0){
            xcrossing <- beta[1]/beta[2]*(-1)
            ##  print(xcrossing)                                                                                                                                                             
        } else {
            xcrossing <- 0
        }
    }
    if (EstimateNullRate == "null"){
        xcrossing <- 0
    }
    if (is.numeric(EstimateNullRate) == TRUE){
        ##  then this is xcrossing                                                                                                                                                           
        xcrossing <- EstimateNullRate
    }
    NullDiff <- ageData$depth[1]-xcrossing
    NullRate <- ageData$age[1]/NullDiff
    NullError <- ageData$error[1]/NullDiff
    ##--------------------------------------------------------------------------------------------##    

## now we have the first rate and add it to the rates                                                                                                                                    
    accRates <- c(NullRate,accRates)
    ## print(accRates)                                                                                                                                                                       
    ## add also a std for the first                                                                                                                                                          
    sigmaRates <- c(NullError,sigmaRates)

    ## initialise arrays for the fits                                                                                                                                                        
    fittedRates <- rep(0,lengthData)
    fittedError <- rep(0,lengthData)


##--------------------------------------------------------------------------------------------##                                                                                         
    ## fitting the segments                                                                                                                                                                  
    for (i in seq(2,segPosLength)) {
        ##  least square                                                                                                                                                                     
        Xsegment <- (segPos[i-1]+1):segPos[i]
        Lsegment <- length(Xsegment)
        ##  create X matrix                                                                                                                                                                  
        X <- replicate(2,rep(1,Lsegment))
        X[,2] <- ageData$depth[Xsegment]
        ##  create error matrix                                                                                                                                                              
        g <- solve(t(X) %*% X)
        ##  print(g)                                                                                                                                                                         
        beta <- g %*% t(X) %*% accRates[Xsegment]
        ##  print(beta)                                                                                                                                                                      
        fittedRates[Xsegment] <- X %*% beta
        ##  res                                                                                                                                                                              
        fittedError[Xsegment] <- sqrt(1/(Lsegment-1) * sum((fittedRates[Xsegment]-accRates[Xsegment])^2))
        ##  print(fittedError[Xsegment])                                                                                                                                                     
    }
    ##--------------------------------------------------------------------------------------------##        

##--------------------------------------------------------------------------------------------##                                                                                         
    ##  BIC                                                                                                                                                                                  
    ##  BIC for Gaussian approx. is BIC=N*ln(epsilon^2)+k*ln(N)                                                                                                                              
    ##  for fitting a linear regression one need k=q+2 parameters to be estimated                                                                                                            
    ##  one for the intercept, q for the slopes and one for the rmse                                                                                                                         
    ##  thus in the case of fitting the mean I need k=2 (intercept, rmse)                                                                                                                    
    ##  BIC[segments] <- N * log(rmse^2) + 2*segments * log(N)                                                                                                                               
    mse <- 1/lengthData * sum((fittedRates-accRates)^2)
    BIC <- lengthData * log(mse) + (2*(segPosLength-1)+1) * log(lengthData)
    ## (segPosLength-1) because above I have added 0,segPos,N to segPos                                                                                                                      
    ## thus (segPosLength-1) is exactly the number of segments                                                                                                                               
    ## write out the BIC                                                                                                                                                                     
    write(BIC,file=paste(path,token,"_BIC.txt",sep=""),ncolumns=1)
    ##--------------------------------------------------------------------------------------------##     

##-------------------------------------------------------------------------------------------##                                                                                         
    ## apply the Bayesian approach, i.e. for the Gaussian assumption the weighted mean                                                                                                       
    BayesFittedRates <- (fittedRates * sigmaRates^2 + accRates * fittedError^2)/(fittedError^2 + sigmaRates^2)
    ## disable Bayes                                                                                                                                                                         
    ##BayesFittedRates <- fittedRates                                                                                                                                                        

    ## and the weighted std                                                                                                                                                                  
    BayesSigmaRates <- sqrt((fittedError^2 * sigmaRates^2)/(fittedError^2 + sigmaRates^2))
    ## disable Bayes                                                                                                                                                                         
    ##BayesSigmaRates <- fittedError                                                                                                                                                         


    ## calculate the ages from the rates                                                                                                                                                     
    ageFit <- cumsum(BayesFittedRates * c(NullDiff,diffDepths))
    ##  print("sum1")                                                                                                                                                                        
    ##  print(sum((ageFit-ageData$age)^2))                                                                                                                                                   

    ##  and the errors                                                                                                                                                                       
    ageFitError <- rep(0,lengthData)
    ageFitError[1] <- BayesSigmaRates[1]*NullDiff
    for (i in seq(1,(lengthData-1))) {
        ageFitError[i+1] <- sqrt(ageFitError[i]^2+(BayesSigmaRates[i+1]*diffDepths[i])^2)
    }
    ##--------------------------------------------------------------------------------------------##     

## plotting the intermediate result of fitted segments                                                                                                                                   
    for (k in plot_format){    
        if (k == "jupyter"){
            Ascale <- 1 #4
            Bscale <- 1 #6
            Cscale <- 2 #8
            Dscale <- 4 #12    
        } else {
            Ascale <- 4
            Bscale <- 6
            Cscale <- 8
            Dscale <- 12    

            par(mar=c(14,14,4,2),mgp=c(10,4,0),bg=rgb(248/255,250/255,252/255))    
            if (k == "png"){
                png(filename=paste(path,token,"_intermediate.png",sep=""),width=2000,height=1400,pointsize=10)#,width=11,height=8)                                                               
            }
            if (k == "pdf"){
                pdf(file=paste(path,token,"_intermediate.pdf",sep=""),width=40,height=28)#,width=11,height=8)                                                                                    
            }
        }
        

        ymin <- min(c(accRates-sigmaRates),na.rm=TRUE)
        ymax <- max(c(accRates+sigmaRates),na.rm=TRUE)
        xmin <- 0 ##min(ageDataOrig$depth)                                                                                                                                                   
        xmax <- max(ageDataOrig$depth)
        plot(ageData$depth,accRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),xlab="Depth [cm]",ylab="Rates [ka/cm]",cex.lab=Ascale,cex.axis=Ascale)
        if (Lsegments > 0) {
            for (i in seq(1,length(segPosPlot))) {
                par(new=TRUE)
                segPosDepth <- (ageData$depth[segPosPlot[i]]+ageData$depth[segPosPlot[i]+1])/2
                plot(c(segPosDepth,segPosDepth),c(ymin,ymax),ylim=c(ymin,ymax),xlim=c(xmin,xmax),col=rgb(200/255,200/255,200/255),type="l",lwd=Dscale,lty=2,xlab="",ylab="",xaxt="n",yaxt="n",cex=Cscale)
            }
        }
        par(new=TRUE)
        polygon(c(ageData$depth, rev(ageData$depth)), c(fittedRates-fittedError,
                                                        rev(fittedRates+fittedError)), col=rgb(0.8,0.8,0.8),border=NA)
        par(new=TRUE)
        plot(ageData$depth,accRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),col=rgb(42/255,190/255,230/255),pch=19,xlab="",ylab="",xaxt="n",yaxt="n",cex=Cscale)
        par(new=TRUE)
        plot(ageData$depth,fittedRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),xlab="",pch=19,ylab="",xaxt="n",yaxt="n",col="red",cex=Ascale)
        par(new=TRUE)
        plot(ageData$depth,fittedRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),lwd=Cscale,xlab="",ylab="",xaxt="n",yaxt="n",type="l",col="red",cex=Cscale)

        arrows(ageData$depth, accRates-sigmaRates, ageData$depth,
               accRates+sigmaRates, length=0.2, angle=90, code=3,col="black",lwd=Bscale,cex=Cscale)

        legend(xmax, ymax, xjust=1,legend=c("data","fit","sequences"),
               col=c(rgb(42/255,190/255,230/255),"red",rgb(200/255,200/255,200/255)), lwd=c(NaN,Cscale,Dscale), lty=c(1,1,2), pch=c(19,19,NaN), cex=c(Bscale), pt.cex=c(Cscale,Ascale,Cscale))
        if (k != "jupyter"){
            dev.off()
        }
    }
    ##--------------------------------------------------------------------------------------------## 

## plotting rates                                                                                                                                                                        
    for (k in plot_format){
        if (k == "jupyter"){
            Ascale <- 1 #4                                                                                                                                                                   
            Bscale <- 1 #6                                                                                                                                                                   
            Cscale <- 2 #8                                                                                                                                                                   
            Dscale <- 4 #12                                                                                                                                                                  
        } else {
            Ascale <- 4
            Bscale <- 6
            Cscale <- 8
            Dscale <- 12

            par(mar=c(14,14,4,2),mgp=c(10,4,0),bg=rgb(248/255,250/255,252/255))
            if (k == "png"){
                png(filename=paste(path,token,"_rates.png",sep=""),width=2000,height=1400,pointsize=10)#,width=11,height=8)                                                                  
            }
            if (k == "pdf"){
                pdf(file=paste(path,token,"_rates.pdf",sep=""),width=40,height=28)#,width=11,height=8)                                                                                       
            }
        }

        ymin <- min(c(accRates-sigmaRates),na.rm=TRUE)
        ymax <- max(c(accRates+sigmaRates),na.rm=TRUE)

        xmin <- 0 ##min(ageDataOrig$depth)                                                                                                                                                   
        xmax <- max(ageDataOrig$depth)
        plot(ageData$depth,accRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),xlab="Depth [cm]",ylab="Rates [ka/cm]",cex.lab=Ascale,cex.axis=Ascale)
        if (Lsegments > 0) {
            for (i in seq(1,length(segPosPlot))) {
                par(new=TRUE)
                segPosDepth <- (ageData$depth[segPosPlot[i]]+ageData$depth[segPosPlot[i]+1])/2
                plot(c(segPosDepth,segPosDepth),c(ymin,ymax),ylim=c(ymin,ymax),xlim=c(xmin,xmax),col=rgb(200/255,200/255,200/255),type="l",lwd=Dscale,lty=2,xlab="",ylab="",xaxt="n",yaxt="n",cex=Cscale)
            }
        }
        par(new=TRUE)
        polygon(c(ageData$depth, rev(ageData$depth)), c(BayesFittedRates-BayesSigmaRates,
                                                        rev(BayesFittedRates+BayesSigmaRates)), col=rgb(0.8,0.8,0.8),border=NA)
        par(new=TRUE)
        plot(ageData$depth,accRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),col=rgb(42/255,190/255,230/255),pch=19,xlab="",ylab="",xaxt="n",yaxt="n",cex=Cscale)
        par(new=TRUE)
        plot(ageData$depth,BayesFittedRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),xlab="",pch=19,ylab="",xaxt="n",yaxt="n",col="red",cex=Ascale)
        par(new=TRUE)
        plot(ageData$depth,BayesFittedRates,ylim=c(ymin,ymax),xlim=c(xmin,xmax),lwd=Cscale,xlab="",ylab="",xaxt="n",yaxt="n",type="l",col="red",cex=Cscale)
        arrows(ageData$depth, accRates-sigmaRates, ageData$depth,
               accRates+sigmaRates, length=0.2, angle=90, code=3,col="black",lwd=Bscale,cex=Cscale)

        legend(xmax, ymax, xjust=1,legend=c("data","fit","sequences"),
               col=c(rgb(42/255,190/255,230/255),"red",rgb(200/255,200/255,200/255)), lwd=c(NaN,Cscale,Dscale), lty=c(1,1,2), pch=c(19,19,NaN), cex=c(Bscale), pt.cex=c(Cscale,Ascale,Cscale))

        if (k != "jupyter"){
            dev.off()
        }
    }
    ##--------------------------------------------------------------------------------------------##      

## plotting ages                                                                                                                                                                         
    for (k in plot_format){
        if (k == "jupyter"){
            Ascale <- 1 #4                                                                                                                                                                   
            Bscale <- 1 #6                                                                                                                                                                   
            Cscale <- 2 #8                                                                                                                                                                   
            Dscale <- 4 #12                                                                                                                                                                  
        } else {
            Ascale <- 4
            Bscale <- 6
            Cscale <- 8
            Dscale <- 12

            par(mar=c(14,14,4,2),mgp=c(10,4,0),bg=rgb(248/255,250/255,252/255))
            if (k == "png"){
                png(filename=paste(path,token,"_ages.png",sep=""),width=2000,height=1400,pointsize=10)#,width=11,height=8)                                                                   
            }
            if (k == "pdf"){
                pdf(file=paste(path,token,"_ages.pdf",sep=""),width=40,height=28)#,width=11,height=8)                                                                                        
            }
        }


        ymin <- min(c(ageData$age-ageData$error,ageDataOrig$age),na.rm=TRUE)
        ymax <- max(c(ageData$age+ageData$error,ageDataOrig$age,ageFit+ageFitError),na.rm=TRUE)
        xmin <- 0 ##min(ageDataOrig$depth)                                                                                                                                                   
        xmax <- max(ageDataOrig$depth)
        plot(ageData$depth,ageData$age,ylim=c(ymin,ymax),xlim=c(xmin,xmax),xlab="Depth [cm]",ylab="Age [ka]",cex.lab=Ascale,cex.axis=Ascale)
        if (Lsegments > 0) {
            for (i in seq(1,length(segPosPlot))) {
                par(new=TRUE)
                segPosDepth <- (ageData$depth[segPosPlot[i]]+ageData$depth[segPosPlot[i]+1])/2
                plot(c(segPosDepth,segPosDepth),c(ymin,ymax),ylim=c(ymin,ymax),xlim=c(xmin,xmax),col=rgb(200/255,200/255,200/255),type="l",lwd=Dscale,lty=2,xlab="",ylab="",xaxt="n",yaxt="n",cex=Cscale)
            }
        }
        par(new=TRUE)
        polygon(c(ageData$depth, rev(ageData$depth)), c(ageFit-ageFitError,
                                                        rev(ageFit+ageFitError)), col=rgb(0.8,0.8,0.8),border=NA,xlim=c(xmin,xmax),)
        par(new=TRUE)
        plot(ageData$depth,ageData$age,ylim=c(ymin,ymax),xlim=c(xmin,xmax),col=rgb(42/255,190/255,230/255),pch=19,xlab="",ylab="",xaxt="n",yaxt="n",cex=Cscale)
        par(new=TRUE)
        plot(ageDataOrig$depth[negRatesIndex],ageDataOrig$age[negRatesIndex],ylim=c(ymin,ymax),xlim=c(xmin,xmax),col=rgb(230/255,190/255,42/255),pch=19,xlab="",ylab="",xaxt="n",yaxt="n",cex=Cscale)
        par(new=TRUE)
        plot(ageData$depth,ageFit,ylim=c(ymin,ymax),xlim=c(xmin,xmax),xlab="",pch=19,ylab="",xaxt="n",yaxt="n",col="red",cex=Ascale)
        par(new=TRUE)
        plot(ageData$depth,ageFit,ylim=c(ymin,ymax),xlim=c(xmin,xmax),lwd=Cscale,xlab="",ylab="",xaxt="n",yaxt="n",type="l",col="red",cex=Cscale)

        arrows(ageDataOrig$depth, ageDataOrig$age-ageDataOrig$error, ageDataOrig$depth,
               ageDataOrig$age+ageDataOrig$error, length=0.2, angle=90, code=3,col="black",lwd=Bscale,cex=Cscale)

        legend(xmax, ymin, xjust=1,yjust=0,legend=c("data","outliers","fit","sequences"),
               col=c(rgb(42/255,190/255,230/255),rgb(230/255,190/255,42/255),"red",rgb(200/255,200/255,200/255)), lwd=c(NaN,NaN,Cscale,Dscale), lty=c(1,1,1,2), pch=c(19,19,19,NaN), cex=c(Bscale), pt.cex=c(Cscale,Cscale,Ascale,NaN))

        if (k != "jupyter"){
            dev.off()
        }
    }
    ##--------------------------------------------------------------------------------------------##