Independence <- function(N=1000, T=1500, t=1000, DGP=1, lag=1, Kernel=1, logistic=FALSE, q=c(.5, 1, 2, 4), m=1, K=10, serial=TRUE, alpha=.05, a0=.1, a1=.5, b1=.5, c0=0, epsilon=1e-3, trim.type=1, quantiles=c(.2, .8), msd=1, replace=FALSE, lib.loc="~/R-Packages") { #################################################################################### # # # Nonparametric Entropy-based Tests of Independence between Stochastic Processes # # Fernandes, Marcelo and Breno Neri, 2007 # # # # Department of Economics, Queen Mary, University of London # # m.fernandes@qmul.ac.uk http://www.econ.qmul.ac.uk/staff/fernandes.htm # # # # Department of Economics, New York University # # breno.neri@nyu.edu http://homepages.nyu.edu/~bpn207 # # # # January, 2008 - Version 31 # # # #################################################################################### # # # Parameter: Default: Description: # # # # N 1000 Number Replications # # T 1500 Number of observations # # t 1000 Number of observations used (burn T-t) # # DGP 1 DGP # # lag 1 Lag in the serial independence test # # Kernel 1 (1) Gaussian, (2) Gamma, (3) Beta [only 1 for now] # # logistic FALSE Logistic transformation: TRUE or FALSE # # q q=c(.5, 1, 2, 4)Vector of entropic indices # # m 1 Block length (not for serial independence case) # # K 10 Truncation lag (not for serial independence case) # # serial TRUE Serial independence test: TRUE or FALSE (not ready)# # alpha .05 Significance level # # a0 .1 Parameters for the DGPs # # a1 .5 Parameters for the DGPs # # b1 .5 Parameters for the DGPs # # c0 0 Parameters for the DGPs # # epsilon 1e-3 Avoid division by zero or bondary problems # # trim.type 1 (1) Quantiles or (2) Multiples of standard deviat. # # quantiles c(.2, .8) Vector of trimming quatiles # # msd 1 Multiples of standard deviations (for trimming) # # replace FALSE Boostrap with replacement: TRUE or FALSE # # lib.loc "~/R-Packages" Library path # # # #################################################################################### # # # You may use this code only if you accept the conditions: # # (1) I am not liable for any problem caused by this code (i.e. you use it at your # # own risk); # # (2) You must give me credit in your papers where this code has been used. # # # #################################################################################### # Initializing library(zoo, lib.loc=lib.loc) library(quadprog, lib.loc=lib.loc) library(tseries, lib.loc=lib.loc) nq <- length(q) test.statistic <- array(0, c(2, N, nq)) q2 <- array(0, nq) size <- array(0, nq) # Weigh Function if(trim.type==1) w <- function(x) x > quantile(x, quantiles[1]) & x < quantile(x, quantiles[2]) else if(trim.type==2) w <- function(x) abs(x-mean(x)) < msd*sd(x) else stop("trim.type must be 1 or 2!") # Bandwidths bandwidthSilverman <- function(x) bw.nrd(x) #CVgamma <- function(h) #{ # h <<- h # term1 <- integrate(gammakernel2, epsilon, 2, subdivisions=500, stop.on.error=FALSE)[[1]] # term2 <- numeric() # for(j in 1:n) term2[j] <- gammakernel(x[-j], x[j], h) # return(term1 - 2*mean(term2)) #} #CVbeta <- function(h) #{ # h <<- h # term1 <- integrate(betakernel2, epsilon, 1-epsilon, subdivisions=500, stop.on.error=FALSE)[[1]] # term2 <- numeric() # for(j in 1:n) term2[j] <- betakernel(x[-j], x[j], h) # return(term1 - 2*mean(term2)) #} #bandwidthgamma <- function(x) return(optim(c(solve(n<<-length(x))), CVgamma, method="L-BFGS-B", lower=epsilon)[[1]]) #bandwidthbeta <- function(x) return(optim(c(solve(n<<-length(x))), CVbeta, method="L-BFGS-B", lower=epsilon)[[1]]) # Gaussian Kernel Estimation GK <- function(u) exp(-u^2/2) / sqrt(2*pi) # Gaussian Kernel Gf <- function(x, x0, bw) mean(GK((x-x0)/bw)) / bw # Density at x0 Gfxy <- function(x, y, x0, y0, bwx, bwy) mean(GK((x-x0)/bwx) * GK((y-y0)/bwy)) / (bwx*bwy) # Density at (x0, y0) # Asymmetric Kernels Estimation #gammakernel <- function(x, x0, bwx) mean(dgamma(x, shape=x0/bwx+1, scale=bwx)) # Density at x0 using Gamma Kernel #betakernel <- function(x, x0, bwx) mean(dbeta(x, shape1=x0/bwx+1, shape2=(1-x0)/bwx+1)) # Density at x0 using Beta Kernel #gammakernel2 <- function(x0) #{ # y <- numeric() # for(i in 1:length(x0)) y[i] <- gammakernel(x, x0[i], h)^2 # return(y) #} #betakernel2 <- function(x0) #{ # y <- numeric() # for(i in 1:length(x0)) y[i] <- betakernel(x, x0[i], h)^2 # return(y) #} #gammakernel.xy <- function(x, y, x0, y0, bwx, bwy) mean(dgamma(x, shape=x0/bwx+1, scale=bwx) * dgamma(y, shape=y0/bwy+1, #scale=bwy)) # Density at (x0, y0) using Gamma Kernel #betakernel.xy <- function(x, y, x0, y0, bwx, bwy) mean(dbeta(x, shape1=x0/bwx+1, shape2=(1-x0)/bwx+1) * dbeta(y, #shape1=y0/bwy+1, shape2=(1-y0)/bwy+1)) # Density at (x0, y0) using Beta Kernel # Test Statistic r <- function(x, y) { n <- length(x) W <- matrix(rep(wx*wy, nq), c(n, nq)) # if(Kernel==1) # { for(i in 1:n) W[i, q!=1] <- W[i, q!=1]*(1-(Gf(x, x[i], bwx)*Gf(y, y[i], bwy)/Gfxy(x, y, x[i], y[i], bwx, bwy))^(1-q[q!=1]))/(1-q[q!=1]) if(sum(q==1)>0) for(i in 1:n) W[i, q==1] <- W[i, q==1]*log(Gfxy(x, y, x[i], y[i], bwx, bwy)/(Gf(x, x[i], bwx)*Gf(y, y[i], bwy))) # } else if(Kernel==2) # { # bwx <- bandwidthgamma(x) # bwy <- bandwidthgamma(y) # for(i in 1:n) W[i, q!=1] <- W[i, q!=1]*(1-(gammakernel(x, x[i], bwx)*gammakernel(y, y[i], bwy)/gammakernel.xy(x, y, x[i], y[i], bwx, bwy))^(1-q[q!=1]))/(1-q[q!=1]) # if(sum(q==1)>0) for(i in 1:n) W[i, q==1] <- W[i, q==1]*log(gammakernel.xy(x, y, x[i], y[i], bwx, bwy)/(gammakernel(x, x[i], bwx)*gammakernel(y, y[i], bwy))) # } else if(Kernel==3) # { # bwx <- bandwidthbeta(x) # bwy <- bandwidthbeta(y) # for(i in 1:n) W[i, q!=1] <- W[i, q!=1]*(1-(betakernel(x, x[i], bwx)*betakernel(y, y[i], bwy)/betakernel.xy(x, y, x[i], y[i], bwx, bwy))^(1-q[q!=1]))/(1-q[q!=1]) # if(sum(q==1)>0) for(i in 1:n) W[i, q==1] <- W[i, q==1]*log(betakernel.xy(x, y, x[i], y[i], bwx, bwy)/(betakernel(x, x[i], bwx)*betakernel(y, y[i], bwy))) # } else stop("Kernel must be 1, 2 or 3!") return(apply(W, 2, mean)/sqrt(sigma2(wx, wy)/t)) } # Sigma2 Function if(serial) sigma2 <- function(wx, wy) var(wx) else sigma2 <- function(wx, wy) { n <- length(wx) mean.x <- mean(x) mean.y <- mean(y) mean.xy <- mean.x*mean.y mean2.x <- mean.x^2 mean2.y <- mean.y^2 gamma.x <- numeric() gamma.y <- numeric() gamma.xy <- numeric() temp <- numeric() for(k in -K:K) { gamma.x[k+K+1] <- cov(wx[max(1, 1-k):min(n-k, n)], wx[max(1+k, 1):min(n, n+k)]) gamma.y[k+K+1] <- cov(wy[max(1, 1-k):min(n-k, n)], wy[max(1+k, 1):min(n, n+k)]) temp[k+K+1] <- (gamma.x[k+K+1]*gamma.y[k+K+1]) + (gamma.x[k+K+1]+gamma.y[k+K+1])*(mean.x - mean.y)^2 # temp[k+K+1] <- gamma.x[k+K+1]*mean2.x + gamma.y[k+K+1]*mean2.y - 2*(gamma.x[k+K+1]+gamma.y[k+K+1])*mean.xy # gamma.xy[k+K+1] <- cov(wx[max(1, 1-k):min(n-k, n)], wy[max(1+k, 1):min(n, n+k)]) # temp[k+K+1] <- gamma.xy[k+K+1] + gamma.x[k+K+1]*mean2.x + gamma.y[k+K+1]*mean2.y - 2*(gamma.x[k+K+1]+gamma.y[k+K+1])*mean.xy } return(sum((1-abs(-K:K)/(K+1))*temp)) } if(serial) # Serial Independence { # Initializing Competitors hw.test.statistic <- array(0, c(2, N)) bds.test.statistic <- array(0, c(2, N, nq)) bds.q2 <- array(0, nq) bds.size <- array(0, nq) # Hong and White Test Statistic hw <- function(x, y) { n <- length(x) temp <- numeric() for(i in 1:n) temp[i] <- log(Gqxy(x, y, x[i], y[i], bwx, bwy)/(Gq(x, x[i], bwx)*Gq(y, y[i], bwy))) return(mean(temp)) } # Quartic Kernel Estimation (for Hong and White) GQ <- function(u) (15/16)*(1-u^2)^2*(abs(u)<=1) Gq <- function(x, x0, bw) mean(GQ((x-x0)/bw)) / bw # Density at x0 Gqxy <- function(x, y, x0, y0, bwx, bwy) mean(GQ((x-x0)/bwx) * GQ((y-y0)/bwy)) / (bwx*bwy) # Density at (x0, y0) # Monte Carlo for(mc in 1:N) { # DGPs e1 <- rnorm(T) if(DGP==1) x <- e1 # i.i.d. N(0, 1) else if(DGP==2) # AR(1) { x <- e1[1] for(i in 2:T) x[i] <- a0 + a1*x[i-1] + e1[i] } else if(DGP==3) # ARCH(1) { x <- sqrt(a0)*e1[1] for(i in 2:T) x[i] <- sqrt(a0 + a1*x[i-1]^2)*e1[i] } else if(DGP==4) # GARCH(1, 1) { h <- a0 x <- sqrt(h)*e1[1] for(i in 2:T) { h[i] <- a0 + a1*x[i-1]^2 + b1*h[i-1] x[i] <- sqrt(h[i])*e1[i] } } else if(DGP==5) # NLMA { x <- e1[1] x[2] <- e1[2] for(i in 3:T) x[i] <- a1*e1[i-1]*e1[i-2] + e1[i] } else if(DGP==6) # TAR { x <- e1[1] for(i in 2:T) if(x[i-1]<=1) x[i] <- a0 - a1*x[i-1] + e1[i] else x[i] <- a0 + a1*x[i-1] + e1[i] } else if(DGP==7) # Threshold GARCH(1, 1) { h <- a0 x <- sqrt(h)*e1[1] for(i in 2:T) { if(e1[i]<0) h[i] <- a0 + a1*x[i-1]^2 + b1*h[i-1] else h[i] <- a0 + a0*x[i-1]^2 + b1*h[i-1] x[i] <- sqrt(h[i])*e1[i] } } else if(DGP==8) # Bilinear AR(1) { x <- e1[1] for(i in 2:T) x[i] <- a0 + a1*x[i-1]*e1[i-1] + e1[i] } else if(DGP==9) # Fractional AR(1) { x <- e1[1] for(i in 2:T) x[i] <- a0 + a1*abs(x[i-1])^b1 + e1[i] } else if(DGP==10) # Sign AR(1) { x <- e1[1] for(i in 2:T) x[i] <- a0 + a1*sign(x[i-1]) + e1[i] } else if(DGP==11) # AR(1) Residual { x <- e0 <- rnorm(1) for(i in 1:T+1) x[i] <- a0 + a1*x[i-1] + e1[i-1] } else stop("DGP must be 1, 2, ..., or 11!") if(DGP==11) { x <- x[(T-t):T+1] ols <- lm(x[1:t+1]~x[1:t]) x <- ols$resid } else x <- x[(T-t+1):T] if(logistic) x <- (temp<-exp(x))/(1+temp) # Bootstrap bwx <- bandwidthSilverman(x[1:(t-lag)]) bwy <- bandwidthSilverman(x[(1+lag):t]) wx <- w(x[1:(t-lag)]) wy <- w(x[(1+lag):t]) test.statistic[1, mc,] <- r(x[1:(t-lag)], x[(1+lag):t]) hw.test.statistic[1, mc] <- hw(x[1:(t-lag)], x[(1+lag):t]) bds.test.statistic[1, mc,] <- c(bds.test(x, 2, q*sd(x))[[1]]) x <- sample(x, replace=replace) if(DGP==11) { y <- e0 for(i in 1:t+1) y[i] <- ols$coef%*%c(1, y[i-1]) + x[i-1] x <- lm(y[1:t+1]~y[1:t])$resid } wx <- w(x[1:(t-lag)]) wy <- w(x[(1+lag):t]) test.statistic[2, mc,] <- r(x[1:(t-lag)], x[(1+lag):t]) hw.test.statistic[2, mc] <- hw(x[1:(t-lag)], x[(1+lag):t]) bds.test.statistic[2, mc,] <- c(bds.test(x, 2, q*sd(x))[[1]]) } # Results for(i in 1:nq) { q2[i] <- quantile(test.statistic[2,, i], prob=1-alpha, names=F) size[i] <- mean(test.statistic[1,, i]>q2[i]) bds.q2[i] <- quantile(bds.test.statistic[2,, i], prob=1-alpha, names=F) bds.size[i] <- mean(bds.test.statistic[1,, i]>bds.q2[i]) } hw.q2 <- quantile(hw.test.statistic[2,], prob=1-alpha, names=F) hw.size <- mean(hw.test.statistic[1,]>hw.q2) print("- - - Report - - - Serial Independence Test - - -") print(paste("Monte Carlo Replications: ", N)) print(paste("Sample Size: ", T)) print(paste("Used Sample Size: ", t)) print(paste("DGP: ", DGP)) print(paste("Lags: ", lag)) print(paste("Logistic Transformation: ", logistic)) print(paste("Parameter a0: ", a0)) print(paste("Parameter a1: ", a1)) print(paste("Parameter b1: ", b1)) print(paste("Replacement: ", replace)) if(trim.type==1) { print("Trimming Quantiles:") print(quantiles) } else print(paste("Multiples of Standard Deviation: ", msd)) print("BDS Size:") print(bds.size) print(paste("Hong & White Size:", hw.size)) print("Fernandes & Neri Size:") print(size) } else # Independence { if(t/m!=floor(t/m)) stop("The number of observations t is not a multiple of the block length m!") # Monte Carlo for(mc in 1:N) { # DGPs e1 <- rnorm(T) e2 <- rnorm(T) if(DGP==1) # independent white noises { x <- e1 y <- e2 } else if(DGP==2) # independent AR(1)s { x <- e1[1] y <- e2[1] for(i in 2:T) { x[i] <- a0 + a1*x[i-1] + e1[i] y[i] <- a0 + a1*y[i-1] + e2[i] } } else if(DGP==3) # independent ARCH(1)s { x <- sqrt(a0)*e1[1] y <- sqrt(a0)*e2[1] for(i in 2:T) { x[i] <- sqrt(a0 + a1*x[i-1]^2)*e1[i] y[i] <- sqrt(a0 + a1*y[i-1]^2)*e2[i] } } else if(DGP==4) # ADL { x <- e1[1] y <- c0*x[1]+e2[1] for(i in 2:T) { x[i] <- a0 + a1*x[i-1] + e1[i] y[i] <- a0 + a1*y[i-1] + c0*x[i] + e2[i] } } else if(DGP==5) { x <- e1[1] y <- c0*x[1]*e2[1] for(i in 2:T) { x[i] <- a0 + a1*x[i-1] + e1[i] y[i] <- a0 + a1*y[i-1] + c0*x[i]*e2[i] } } else if(DGP==6) { x <- sqrt(a0)*e1[1] y <- sqrt(a0)*e2[1] for(i in 2:T) { x[i] <- sqrt(a0 + a1*x[i-1]^2)*e1[i] y[i] <- sqrt(a0 + a1*y[i-1]^2 + c0*x[i-1]^2)*e2[i] } } else if(DGP==7) #MA(1)s with Gaussian errors { x <- e1[1] y <- e2[1] for(i in 2:T) { x[i] <- a1*e1[i-1] + e1[i] y[i] <- a0*e2[i-1] + c0*x[i] + e2[i] } } else if(DGP==8) #MA(1)s with t(3) errors { x <- rt(T, df=3) y <- rt(T, df=3) for(i in 2:T) { x[i] <- a1*e1[i-1] + e1[i] y[i] <- a0*e2[i-1] + c0*x[i] + e2[i] } } else if(DGP==9) #MA(1)s with chisq(3)-1 errors { x <- rchisq(T, df=3)-1 y <- rchisq(T, df=3)-1 for(i in 2:T) { x[i] <- a1*e1[i-1] + e1[i] y[i] <- a0*e2[i-1] + c0*x[i] + e2[i] } } else stop("DGP must be 1, 2, ..., or 9!") x <- x[(T-t+1):T] y <- y[(T-t+1):T] if(logistic) x <- (temp<-exp(x))/(1+temp) if(logistic) y <- (temp<-exp(y))/(1+temp) # Bootstrap bwx <- bandwidthSilverman(x) bwy <- bandwidthSilverman(y) wx <- w(x) wy <- w(y) test.statistic[1, mc,] <- r(x, y) index.temp <- sample((1:(t-m+1)), t/m, replace=replace) index <- numeric() for(i in 1:length(index.temp)) index <- c(index, index.temp[i]:(index.temp[i]+m-1)) x <- x[index] y <- y[index] wx <- w(x) wy <- w(y) test.statistic[2, mc,] <- r(x, y) } # Results for(i in 1:nq) { q2[i] <- quantile(test.statistic[2,, i], prob=1-alpha, names=F) size[i] <- mean(test.statistic[1,, i]>q2[i]) } print("- - - Report - - - Independence Test - - -") print(paste("Monte Carlo Replications: ", N)) print(paste("Sample Size: ", T)) print(paste("Used Sample Size: ", t)) print(paste("DGP: ", DGP)) print(paste("Logistic Transformation: ", logistic)) print(paste("Parameter a0: ", a0)) print(paste("Parameter a1: ", a1)) print(paste("Parameter b1: ", b1)) print(paste("Parameter c0: ", c0)) print(paste("Replacement: ", replace)) print(paste("Block Length: ", m)) if(trim.type==1) { print("Trimming Quantiles:") print(quantiles) } else print(paste("Multiples of Standard Deviation: ", msd)) print("Fernandes & Neri Size:") print(size) } }