install.packages(c("tseries","pROC")) options(repr.plot.width=4, repr.plot.height=4) cat(" Bayesian Cointegration Implementation of a Bayesian test for cointegration Written by Chris Bracegirdle (c) Chris Bracegirdle 2015. All rights reserved.") GenerateData <- function(T) { cointegrated <- runif(1) > 0.5 phi <- if(cointegrated) runif(1) * 2 - 1 else 1 std_eta <- exp(rnorm(1)) std_x <- exp(rnorm(1)) intercept <- exp(rnorm(1, sd=5)) slope <- exp(rnorm(1, mean=1, sd=5)) epsilon <- double(length=T) x <- double(length=T) y <- double(length=T) epsilon[1] <- rnorm(1, sd=std_eta) x[1] <- rnorm(1, sd=std_x) y[1] <- intercept + slope * x[1] + epsilon[1] for (t in 2:T){ epsilon[t] <- phi * epsilon[t-1] + rnorm(1, sd=std_eta) x[t] <- x[t-1] + rnorm(1, sd=std_x) y[t] <- intercept + slope * x[t] + epsilon[t] } return(list("cointegrated" = cointegrated, "x" = x, "y" = y)) } gendata = GenerateData(1000) par(cex.axis=0.7, cex.lab=0.7, cex.main=0.7, cex.sub=0.7) plot(gendata$x, type='l', col='blue', ylim=range(gendata$x, gendata$y), ylab='') par(new=T); plot(gendata$y, type='l', col='green', axes=F, ylab=''); par(new=F) gendata[names(gendata)=='cointegrated'] LinearRegression <- function(x,y) { slope <- cor(x, y) * (sd(y) / sd(x)) intercept <- mean(y) - (slope * mean(x)) std_eta = sd( y - intercept - slope * x) return(list("slope"=slope, "intercept"=intercept, "std_eta"=std_eta)) } LinearRegression(gendata$x,gendata$y) mylogsumexp <- function(a,b) { #LOGSUMEXP Compute log(sum(exp(a).*b)) valid for large a # example: mylogsumexp(c(-1000,-1001,-998),c(1,2,0.5)) amax <- max(Re(a)) if (amax == -Inf) { amax=0 } return(amax + log(as.complex(sum(exp(a-amax)*b)))) } CalcLogAreaLog <- function(logf,logF) { lncdf <- pnorm(c(1, -1), mean = Re(exp(logf)), sd = Re(exp(0.5*logF)), log.p = TRUE) logarea <- Re(mylogsumexp(lncdf, c(1, -1))-log(2)) return(logarea) } CalcMomentsLog <- function(logf,logF,logarea) { lnpdf <- dnorm(c(1,-1), mean = Re(exp(logf)), sd = Re(exp(0.5*logF)), log = TRUE) logmoment1 <- mylogsumexp(c(lnpdf[1]+logF-logarea, lnpdf[2]+logF-logarea, logf), c(-0.5, 0.5, 1)) logmoment2 <- mylogsumexp(c(logF+logf+lnpdf[1]-logarea, logF+logf+lnpdf[2]-logarea, logF+lnpdf[1]-logarea, logF+lnpdf[2]-logarea, 2*logf, logF), c(-0.5, 0.5, -0.5, -0.5, 1, 1)) return(list("moment1"=Re(exp(logmoment1)), "moment2"=Re(exp(logmoment2)))) } Filtering <- function(V,std_eta) { T <- length(V) # DIRECT METHOD logft <- log(as.complex(sum(V[2:T]*V[1:T-1])))-log(sum(V[1:T-1]^2)) logFt <- 2*log(std_eta) - log(sum(V[1:T-1]^2)) stopifnot(!is.nan(logft)) #logft must be real stopifnot(!is.nan(logFt)) #logFt must be real stopifnot(is.double(logFt)) #logFt must be real logarea <- CalcLogAreaLog(logft,logFt) loglik <- -0.5*log(sum(V[1:T-1]^2))-0.5*(T-2)*log(2*pi*std_eta^2)+logarea -(sum(V[2:T]^2)-sum(V[2:T]*V[1:T-1])^2/sum(V[1:T-1]^2))/(2*std_eta^2) # calculate moments moments <- CalcMomentsLog(logft,logFt,logarea) return(list("loglik"=loglik, "moment1"=moments$moment1, "moment2"=moments$moment2)) } EMUpdate <- function(x,y,moment1,moment2) { T <- length(x) xt <- x[2:T] xtm1 <- x[1:T-1] yt <- y[2:T] ytm1 <- y[1:T-1] # find the coefficients a <- 2 * (T-1) * moment1 - (T-1) * moment2 - (T-1) b <- moment1 * sum(xt+xtm1) - moment2 * sum(xtm1) - sum(xt) c <- moment2 * sum(ytm1) - moment1 * sum(yt + ytm1) + sum(yt) d <- 2 * moment1 * sum(xt * xtm1) - moment2 * sum(xtm1 ^ 2) - sum(xt ^ 2) e <- moment2 * sum(xtm1 * ytm1) - moment1 * sum(xtm1 * yt + xt * ytm1) + sum(xt * yt) # solve simultaneous equations slope <- ((a * e) - (c * b)) / ((b ^ 2) - (a * d)) intercept <- (-slope * d / b) - (e / b) # now find optimal sigma eps <- y - intercept - slope * x ept <- eps[2:T] eptm1 <- eps[1:T-1] std_eta <- sqrt( (sum(ept^2) - 2 * moment1 * sum( ept * eptm1) + moment2 * sum(eptm1 ^ 2)) / (T-1) ) stopifnot(std_eta>0) #Standard deviation must be positive stopifnot(is.double(std_eta)) #Standard deviation must be real return(list("slope"=slope,"intercept"=intercept,"std_eta"=std_eta)) } CointInference <- function(epsilon,std_eta,x,y) { filtering <- Filtering(epsilon,std_eta) update <- EMUpdate(x,y,filtering$moment1,filtering$moment2) #std_eta_with_old_regression <- sqrt( (sum(epsilon[1:]**2) \ # - 2 * sum(moment1 * epsilon[1:] * epsilon[:-1]) \ # + sum(moment2 * epsilon[:-1] ** 2)) / (x.size - 1) ) return(list("loglik"=filtering$loglik, "slope"=update$slope, "intercept"=update$intercept, "std_eta"=update$std_eta#,"std_eta_with_old_regression"=std_eta_with_old_regression #,"moment1"=filtering$moment1 )) } BayesianLearningTest <- function(x,y) { T <- length(x) ols <- LinearRegression(x,y) slope <- ols$slope intercept <- ols$intercept std_eta_coint <- ols$std_eta # cointegrated case - learn slope, intercept, std by ML logliks <- c(-Inf) for (i in 1:1000){ stopifnot(!is.nan(intercept)) #Intercept cannot be nan stopifnot(!is.nan(slope)) #Slope cannot be nan stopifnot(is.double(std_eta_coint)) #Standard deviation must be real stopifnot(std_eta_coint > 0) #Standard deviation must be greater then 0 inference <- CointInference(y-intercept-slope*x,std_eta_coint,x,y) slope <- inference$slope intercept <- inference$intercept std_eta_coint <- inference$std_eta if (inference$loglik-tail(logliks, n=1)<0.00001) { break } logliks <- c(logliks,inference$loglik) } # non-cointegrated case - use above slope, intercept, use ML std epsilon <- y-intercept-slope*x std_eta_p1 <- sqrt(mean((epsilon[2:T]-epsilon[1:T-1])^2)) loglik_p1 <- sum(dnorm(epsilon[2:T], mean = epsilon[1:T-1], sd = std_eta_p1, log = TRUE)) bayes_factor <- exp(loglik_p1 - inference$loglik) cointegrated <- inference$loglik > loglik_p1 return(loglik_p1 - inference$loglik) } gendata = GenerateData(1000) par(cex.axis=0.7, cex.lab=0.7, cex.main=0.7, cex.sub=0.7) plot(gendata$x, type='l', col='blue', ylim=range(gendata$x, gendata$y), ylab='') par(new=T); plot(gendata$y, type='l', col='green', axes=F, ylab=''); par(new=F) gendata[names(gendata)=='cointegrated'] BF <- BayesianLearningTest(gendata$x,gendata$y) test_result <- BF<0 if (test_result == gendata$cointegrated) { comparison <- "Congratulations! The result of the routine matches the ground truth" } else { comparison <- "Unfortunately the routine disagreed with the ground truth" } list("test_result"=test_result, "comparison"=comparison) require(tseries) ols <- LinearRegression(gendata$x,gendata$y) epsilon <- gendata$y-ols$intercept-ols$slope*gendata$x adf <- adf.test(epsilon, k=1) pvalue <- adf$p.value cointegrated_adf <- pvalue<0.05 cointegrated_adf T <- 20 experiments <- 5000 cointegratedActual <- logical(length=experiments) logBF <- double(length=experiments) pvalue <- double(length=experiments) for (expr in 1:experiments) { gendata <- GenerateData(T) cointegratedActual[expr] <- gendata$cointegrated #classical test ols <- LinearRegression(gendata$x,gendata$y) epsilon <- gendata$y-ols$intercept-ols$slope*gendata$x suppressWarnings(adf <- adf.test(epsilon, k=1)) pvalue[expr] <- adf$p.value #bayesian test logBF[expr] <- BayesianLearningTest(gendata$x,gendata$y) if (expr %% (experiments/20) == 0) { print(paste("Experiment",expr,"of",experiments)) } } require(pROC) roc_adf <- roc(cointegratedActual, -pvalue) roc_bayes <- roc(cointegratedActual, -logBF) par(cex.axis=0.7, cex.lab=0.7, cex.main=0.7, cex.sub=0.7) plot.roc(roc_adf, col='blue') plot.roc(roc_bayes, add=TRUE, col='green') legend("bottomright", legend=c("DF", "Bayes"), col=c("blue",'green'), lwd=2, text.width = 0.15, cex=0.7) list("adf"=roc_adf, "bayes"=roc_bayes)