cut.i.fn=function(i,f.t,cen.t,cen.ind,xx,lin.pred,at.risk.mat,II.22,SS.2,tmp.1,bar.0,bar.xx,bar.zz.xx,p.num,Lambda.t){ # print(i) cut.pt=f.t[i] ct=sum(cen.t<=cut.pt) ss.1=sum(cen.ind*as.numeric(cen.t<=cut.pt)*(xx[,1]- bar.xx[,1]/bar.0)) II.12=numeric() for(j in 1:p.num){ II.12[j]=-sum(cen.ind*as.numeric(cen.t<=cut.pt)*(bar.zz.xx[,j]/bar.0- bar.xx[,1]*bar.xx[,j]/bar.0^2)) } tau.ind=c(1:ct,rep(ct,N-ct)) ss.1.i=cen.ind*(as.numeric(cen.t<=cut.pt)*(xx[,1]- bar.xx[,1]/bar.0))-(lin.pred*(xx[,1]*Lambda.t[tau.ind]-tmp.1[tau.ind]))+ as.vector(II.12%*%II.22%*%t(SS.2)) return(c(ss.1,ss.1.i)) } ##Main Function## coxcp=function(cen.t,cen.ind,xx,src.l,src.r,res.n,randomseed,alpha){ #cen.t: survival time (n*1 vector) #cen.ind: censoring indicator, 1: failure; 0: censored (n*1 vector) #xx: risk factors (the first column is the covariate associated with a change-point) (must be numerical numbers, n*p matrix) #src.l: search change-point interval left end #src.r: search change-point interval right end #res.n: number of resampling runs #randomseed:random variable generation seed for the resampling approach #alpha: test significance level library(MASS) library(survival) set.seed(randomseed) #first, sort the data by observed failure time# tmp.data=cbind(cen.t,cen.ind,xx) ind=order(cen.t) tmp.data=tmp.data[order(cen.t),] cen.t=tmp.data[,1] cen.ind=tmp.data[,2] k.num=dim(tmp.data)[2] xx=tmp.data[,3:k.num] N=length(cen.t) if(is.matrix(xx)==F){ xx=matrix(xx,nrow=N) } p.num=dim(xx)[2] print(dim(xx)) xnam=paste("x", 1:(p.num), sep="") tmp.data=data.frame(tmp.data) names(tmp.data)=c("cen.t","cen.ind",xnam) fmla=as.formula(paste("Surv(cen.t,cen.ind) ~ ", paste(xnam, collapse= "+"))) cox.null=coxph(fmla,data=tmp.data) alpha.null=as.numeric(cox.null$coef) II.22=cox.null$var lin.pred=exp(as.vector(xx%*%matrix(alpha.null,ncol=1))) at.risk.mat=matrix(NA,nrow=N,ncol=N) for(i in 1:N){ at.risk.mat[i,]=as.numeric(cen.t[i]>=cen.t) } bar.0=colSums(at.risk.mat*matrix(rep((lin.pred),N),ncol=N)) bar.xx=matrix(NA,nrow=N,ncol=p.num) bar.zz.xx=matrix(NA,nrow=N,ncol=p.num) for(i in 1:p.num){ bar.xx[,i]=colSums(at.risk.mat*matrix(rep((lin.pred*xx[,i]),N),ncol=N)) bar.zz.xx[,i]=colSums(at.risk.mat*matrix(rep((lin.pred*xx[,1]*xx[,i]),N),ncol=N)) } Lambda.t=cumsum(cen.ind/bar.0) SS.2=matrix(NA,nrow=N,ncol=p.num) tmp.1=cumsum(cen.ind*bar.xx[,1]/bar.0^2) for(i in 1:p.num){ tmp.2=cumsum(cen.ind*bar.xx[,i]/bar.0^2) SS.2[,i]=cen.ind*(xx[,i]-bar.xx[,i]/bar.0)-(lin.pred*(xx[,i]*Lambda.t-tmp.2)) } f.num=sum(cen.ind[cen.t>=src.l&cen.t<=src.r]) f.t=cen.t[cen.ind==1&cen.t>=src.l&cen.t<=src.r] score=t(sapply(1:f.num,cut.i.fn,f.t=f.t,cen.t=cen.t,cen.ind=cen.ind,xx=xx, lin.pred=lin.pred,at.risk.mat=at.risk.mat,II.22=II.22,SS.2=SS.2,tmp.1=tmp.1, bar.0=bar.0,bar.xx=bar.xx,bar.zz.xx=bar.zz.xx,p.num=p.num,Lambda.t=Lambda.t)) #the first column is the observed score fn at N cutpoints, the 2:(N+1) columns are for N subjects at cut points ss.tot=score[,1] ss=score[,2:(N+1)] rm(score) test.prof=abs(ss.tot) GG=matrix(rnorm(N*res.n),nrow=N) thres=apply(abs(ss%*%GG),2,max) change=f.t[test.prof==max(test.prof)] vv.tot=rowSums(ss^2) test.prof.2=ss.tot^2/vv.tot thres.2=apply(((ss%*%GG)^2/matrix(rep(vv.tot,res.n),ncol=res.n)),2,max) change.2=f.t[test.prof.2==max(test.prof.2)] plot(f.t,test.prof,type="o",ylim=c(0,max(test.prof,quantile(c(test.prof.2,thres,thres.2),0.99))),xlab="Time (weeks)",ylab="test profile") abline(h=quantile(thres,(1-alpha))) abline(v=change) legend("topright",legend=c("sup |S|","sup W"),col=1:2,lty=1:2) lines(f.t,test.prof.2,type="o",col=2,lty=2) abline(h=quantile(thres.2,(1-alpha)),col=2,lty=2) abline(v=change.2,col=2,lty=2) cat("sup|S|=", max(test.prof),paste(round(100*(1-alpha),0), "% threshold="),quantile(thres,(1-alpha)),"p-value=",mean(thres>=max(test.prof)),change,"\n") cat("\n") cat("sup W=",max(test.prof.2),paste(round(100*(1-alpha),0), "% threshold="),quantile(thres.2,(1-alpha)),"p-value=",mean(thres.2>=max(test.prof.2)),change.2,"\n") } ##example 1 set.seed(100) N=300 true.beta=-0.5 true.theta=1.5 true.gamma=-1 change.point=3 t.end=8 zz=rbinom(N,1,0.5) xx=zz tmp=exp((true.beta+true.theta)*zz)*change.point*0.5 ##cumulative hazard=t u.tmp=runif(N,0,1) tt=numeric() ind.tmp=exp(-tmp)>u.tmp tt[ind.tmp]=(1/0.5*(-log(u.tmp)-tmp)/(exp(true.beta*zz))+change.point)[ind.tmp] tt[!ind.tmp]=1/0.5*(-log(u.tmp)/exp((true.beta+true.theta)*zz))[!ind.tmp] cc=pmin(runif(N,min=0,max=30),rep(t.end,N)) cen.ind=as.numeric(tt<=cc) cen.t=pmin(tt,cc) coxcp(cen.t,cen.ind,xx,0,6,10000,1,0.05) ##example 2 set.seed(100) N=300 true.beta=-0.5 true.theta=1 true.gamma=-1 change.point=3 t.end=8 zz=rbinom(N,1,0.5) xx=rnorm(N,1,0.5) tmp=exp((true.beta+true.theta)*zz+true.gamma*xx)*change.point*0.5 ##cumulative hazard=t u.tmp=runif(N,0,1) tt=numeric() ind.tmp=exp(-tmp)>u.tmp tt[ind.tmp]=(1/0.5*(-log(u.tmp)-tmp)/(exp(true.beta*zz+true.gamma*xx))+change.point)[ind.tmp] tt[!ind.tmp]=1/0.5*(-log(u.tmp)/exp((true.beta+true.theta)*zz+true.gamma*xx))[!ind.tmp] cc=pmin(runif(N,min=0,max=30),rep(t.end,N)) cen.ind=as.numeric(tt<=cc) xx=cbind(zz,xx) cen.t=pmin(tt,cc) coxcp(cen.t,cen.ind,xx,0,6,10000,1,0.01) ##example 3 brain=read.csv("C:\\Research\\Change Point\\BRAIN\\brain.csv",header=TRUE)##Note that you may need to change the path to direct to where you store the data print(dim(brain)) print(names(brain)) #[1] 222 13 # [1] "treat" "resect75" "age" "interval" "PS" "race" "local" "male" "nitro" "weeks" #[11] "event" "path" "grade" brain$age=brain$age/10 #brain$treat=1-brain$treat brain$grade=1-brain$grade resampling=10000 brain=brain[brain$path==1,] brain=brain[order(brain$weeks),] brain=brain[rowSums(is.na(brain))==0,] print(dim(brain)) N=length(brain[,1]) cen.t=brain$weeks cen.ind=brain$event zz=brain$treat p.num=8 xx=cbind(zz, brain$resect75,brain$age,brain$race,brain$local,brain$nitro,brain$grade,brain$PS) coxcp(cen.t,cen.ind,xx,0,80,10000,1,0.05)