########################################################################## ## Chapter 1, section 1.11 ########################################################################## ####################### ### The aidssi data set ####################### library(mstate) # also loads the survival package data(aidssi) aidssi[c(14,3,15,8),] # four example individuals ###################################### ### Define time and status information ###################################### Surv(time=time, event=(status!=0)) Surv(time=time, event=(cause!="event-free")) ######################## ### Perform calculations ######################## KM.curve <- survfit(Surv(time, status!=0)~1, data=aidssi) # Kaplan-Meier fit.Cox <- coxph(Surv(time, status!=0)~ccr5, data=aidssi) # Cox model ###################### ### Summary of outcome ###################### KM.curve # concise summary of Kaplan-Meier estimate summary(KM.curve) # more detailed summary summary(KM.curve, times=c(5,10)) # detailed summary at specific time points ## confidence intervals on log-log scale summary(survfit(Surv(time, status!=0)~1, data=aidssi, conf.type="log-log"), times=c(5,10)) ## plot Kaplan-Meier par(las=1) ## as survival function plot(KM.curve, mark.time=FALSE, conf.int=FALSE, xlab="time since HIV infection (years)", ylab="overall cumulative incidence") ## FIGURE 1.22, as cumulative incidence plot(KM.curve, fun="event", mark.time=FALSE, conf.int=FALSE, xlab="time since HIV infection (years)", ylab="overall cumulative incidence") ## estimate of cumulative hazard plot(KM.curve, fun="cumhaz", mark.time=FALSE, conf.int=FALSE, xlab="time since HIV infection (years)", ylab="overall cumulative incidence") fit.Cox # concise summary of Cox model summary(fit.Cox) # more detailed summary cox.zph(fit.Cox) # test for proportionality based on scaled Schoenfeld residuals ## prediction based on Cox model summary(survfit(fit.Cox), times=c(5,10)) # "average individual" new.indiv <- data.frame(ccr5=c("WW","WM")) # create specific values summary(survfit(fit.Cox, newdata=new.indiv)[1], times=c(5,10)) # first individual in new.indiv (WW) summary(survfit(fit.Cox, newdata=new.indiv)[2], times=c(5,10)) # second individual (WM) plot(survfit(fit.Cox, newdata=new.indiv)) # predicted survival curves ################# ### Log-rank test ################# survdiff(Surv(time,status!=0)~ccr5, data=aidssi) KM.ccr5 <- survfit(Surv(time,status!=0)~ccr5, data=aidssi) # Kaplan-Meier per CCR5 value summary(KM.ccr5[1]) # summary for reference value (WW) plot(KM.ccr5) # plot for both values plot(KM.ccr5[1]) # plot for WW group rm(KM.curve, fit.Cox, KM.ccr5) ########################################################################### ## chapter 2, section 2.9 ########################################################################### ########################## ## The Aalen-Johansen form ########################## ### The survival package ######################## ## compute estimates csiSurv <- survfit(Surv(time=time,event=status,type="mstate")~1, data=aidssi) ## alternative event specification, via factor variable "cause" survfit(Surv(time=time,event=relevel(cause,"event-free"))~1, data=aidssi) ## estimates at specific time points summary(csiSurv, times=seq(2,10,by=2)) ## confidence intervals csiSurv$lower csiSurv$upper ## FIGURE 2.4 par(las=1) plot(survfit(Surv(time,status==2)~1,data=aidssi), xlim=c(0,13), mark.time=FALSE, col="grey50", conf.int=FALSE, xlab="time since HIV infection",xaxs="i") lines(survfit(Surv(time,status==1)~1,data=aidssi), fun="event", mark.time=FALSE, col="grey50", conf.int=FALSE) lines(csiSurv[2], fun="identity", mark.time=FALSE, lwd=2, conf.int=FALSE) lines(csiSurv[1], mark.time=FALSE, lwd=2, conf.int=FALSE) text(8.9, 0.18, "time to AIDS") text(8.9, 0.75, "time to SI") ## FIGURE 2.5 plot(survfit(Surv(time,status>0)~1, data=aidssi), xlim=c(0,13), mark.time=FALSE, conf.int=FALSE, xlab="time since HIV infection", fun="event", lwd=2, xaxs="i") lines(csiSurv[1], fun="event", mark.time=FALSE, lwd=2, conf.int=FALSE) text(8.9, 0.15, "time to AIDS") text(8.9, 0.45, "time to SI") ## create data frame with confidence intervals on the two log scales and complementary log-log scales, as well ## as on the plain scale csiSurvCI <- data.frame(est=csiSurv$prev[,2]) csiSurvCI$time <- csiSurv$time csiSurvCI$std.err <- csiSurv$std.err[,2] csiSurvCI$lower.log <- pmax(0,1-(1-csiSurvCI$est)* exp(qnorm(0.975)*csiSurvCI$std.err/(1-csiSurvCI$est))) csiSurvCI$upper.log <- 1-(1-csiSurvCI$est)* exp(-qnorm(0.975)*csiSurvCI$std.err/(1-csiSurvCI$est)) csiSurvCI$lower.log2 <- csiSurvCI$est* exp(-qnorm(0.975)*csiSurvCI$std.err/csiSurvCI$est) csiSurvCI$upper.log2 <- pmin(1,csiSurvCI$est* exp(qnorm(0.975)*csiSurvCI$std.err/csiSurvCI$est)) csiSurvCI$lower.loglog <- 1-(1-csiSurvCI$est)^( exp(qnorm(0.975)*csiSurvCI$std.err/((1-csiSurvCI$est)*log(1-csiSurvCI$est)))) csiSurvCI$upper.loglog <- 1-(1-csiSurvCI$est)^( exp(-qnorm(0.975)*csiSurvCI$std.err/((1-csiSurvCI$est)*log(1-csiSurvCI$est)))) csiSurvCI$lower.loglog2 <- csiSurvCI$est^( exp(-qnorm(0.975)*csiSurvCI$std.err/(csiSurvCI$est*log(csiSurvCI$est)))) csiSurvCI$upper.loglog2 <- csiSurvCI$est^( exp(qnorm(0.975)*csiSurvCI$std.err/(csiSurvCI$est*log(csiSurvCI$est)))) csiSurvCI$lower.lin <- pmax(0,csiSurvCI$est-qnorm(0.975)*csiSurvCI$std.err) csiSurvCI$upper.lin <- pmin(1,csiSurvCI$est+qnorm(0.975)*csiSurvCI$std.err) ## FIGURE 2.8 par(las=1) plot(c(0,est)~c(0,time), data=csiSurvCI, lwd=3, xlim=c(0,3), xaxs="i", ylab="", xlab="time since HIV infection (years)", ylim=c(0,0.10), type="s") lines(upper.log~time, data=csiSurvCI, type="s", lty="dotted", lwd=3) lines(lower.log~time, data=csiSurvCI, type="s", lty="dotted", lwd=3) lines(upper.log2~time, data=csiSurvCI, type="s", lty="dotted", lwd=3, col="grey70") lines(lower.log2~time, data=csiSurvCI, type="s", lty="dotted", lwd=3, col="grey70") lines(upper.loglog~time, data=csiSurvCI, type="s") lines(lower.loglog~time, data=csiSurvCI, type="s") lines(upper.loglog2~time, data=csiSurvCI, type="s", col="grey70") lines(lower.loglog2~time, data=csiSurvCI, type="s", col="grey70") lines(upper.lin~time, data=csiSurvCI, type="s", lty="dashed") lines(lower.lin~time, data=csiSurvCI, type="s", lty="dashed") ### the etm package ################### library(etm) ## compute estimates csiEtm <- etmCIF(Surv(time=time,event=status!=0)~1, data=aidssi, etype=status, failcode=c(1,2)) ## brief summary, not very useful csiEtm ## summary of estimates at six time points (error if options(stringsAsFactors=FALSE) summary(csiEtm) ## summary at all time points if assigned to object summEtm <- summary(csiEtm) ## detailed information on first event type (AIDS) summEtm[[1]][["CIF 1"]] ## alternative event specification, via factor variable "cause" summEtm <- summary(etmCIF(Surv(time=time,event=status!=0)~1, data=aidssi, etype=cause, failcode=c("AIDS","SI"))) summEtm[[1]][["CIF AIDS"]] ## confidence intervals on another scale, e.g. summary(csiEtm, ci.fun="log") ## another way to obtain the estimate trprob(csiEtm[[1]], tr.choice="0 2") ## estimate and variance at specific time points trprob(csiEtm[[1]], tr.choice="0 2", timepoints=seq(2,10,by=2)) trcov(csiEtm[[1]], tr.choice="0 2", timepoints=seq(2,10,by=2)) ## FIGURE 2.4 par(las=1) plot(survfit(Surv(time,status==2)~1, data=aidssi), xlim=c(0,13), mark.time=FALSE, col="grey50", conf.int=FALSE, xlab="time since HIV infection", xaxs="i") lines(survfit(Surv(time,status==1)~1, data=aidssi), fun="event", mark.time=FALSE, col="grey50", conf.int=FALSE) with(csiEtm[[1]], lines(time,1-est[1,3,], lwd=2)) # plot.etm cannot use the survival scale lines(csiEtm[[1]], tr.choice="0 1", lwd=2) text(8.9, 0.18, "time to AIDS") text(8.9, 0.75, "time to SI") ## FIGURE 2.5 plot(survfit(Surv(time,status>0)~1, data=aidssi), xlim=c(0,13), mark.time=FALSE, conf.int=FALSE, xlab="time since HIV infection", fun="event", lwd=2, xaxs="i") lines(csiEtm[[1]], tr.choice="0 1", lwd=2) text(8.9, 0.15, "time to AIDS") text(8.9, 0.45, "time to SI") ## create data frame with estimate and confidence interval on different scales csiEtmCI <- data.frame(est=trprob(csiEtm[[1]], tr.choice="0 2")) csiEtmCI$time <- as.numeric(row.names(csiEtmCI)) csiEtmCI$std.err <- sqrt(trcov(csiEtm[[1]], tr.choice="0 2")) csiEtmCI$lower.log <- pmax(0,1-(1-csiEtmCI$est)* exp(qnorm(0.975)*csiEtmCI$std.err/(1-csiEtmCI$est))) csiEtmCI$upper.log <- 1-(1-csiEtmCI$est)* exp(-qnorm(0.975)*csiEtmCI$std.err/(1-csiEtmCI$est)) csiEtmCI$lower.log2 <- csiEtmCI$est* exp(-qnorm(0.975)*csiEtmCI$std.err/csiEtmCI$est) csiEtmCI$upper.log2 <- pmin(1,csiEtmCI$est* exp(qnorm(0.975)*csiEtmCI$std.err/csiEtmCI$est)) csiEtmCI$lower.loglog <- 1-(1-csiEtmCI$est)^( exp(qnorm(0.975)*csiEtmCI$std.err/((1-csiEtmCI$est)*log(1-csiEtmCI$est)))) csiEtmCI$upper.loglog <- 1-(1-csiEtmCI$est)^( exp(-qnorm(0.975)*csiEtmCI$std.err/((1-csiEtmCI$est)*log(1-csiEtmCI$est)))) csiEtmCI$lower.loglog2 <- csiEtmCI$est^( exp(-qnorm(0.975)*csiEtmCI$std.err/(csiEtmCI$est*log(csiEtmCI$est)))) csiEtmCI$upper.loglog2 <- csiEtmCI$est^( exp(qnorm(0.975)*csiEtmCI$std.err/(csiEtmCI$est*log(csiEtmCI$est)))) csiEtmCI$lower.lin <- pmax(0,csiEtmCI$est-qnorm(0.975)*csiEtmCI$std.err) csiEtmCI$upper.lin <- pmin(1,csiEtmCI$est+qnorm(0.975)*csiEtmCI$std.err) ## FIGURE 2.8 par(las=1) plot(c(0,est)~c(0,time), data=csiEtmCI, lwd=3, xlim=c(0,3), xaxs="i", ylab="", xlab="time since HIV infection (years)", ylim=c(0,0.10), type="s") lines(upper.log~time, data=csiEtmCI, type="s", lty="dotted", lwd=3) lines(lower.log~time, data=csiEtmCI, type="s", lty="dotted", lwd=3) lines(upper.log2~time, data=csiEtmCI, type="s", lty="dotted", lwd=3, col="grey70") lines(lower.log2~time, data=csiEtmCI, type="s", lty="dotted", lwd=3, col="grey70") lines(upper.loglog~time, data=csiEtmCI, type="s") lines(lower.loglog~time, data=csiEtmCI, type="s") lines(upper.loglog2~time, data=csiEtmCI, type="s", col="grey70") lines(lower.loglog2~time, data=csiEtmCI, type="s", col="grey70") lines(upper.lin~time, data=csiEtmCI, type="s", lty="dashed") lines(lower.lin~time, data=csiEtmCI, type="s", lty="dashed") ## In version >= 0.6-2 there is a function ggtransfo that transforms estimates to format for use in ggplot2 csiEtm.gg <- ggtransfo(csiEtm[[1]]) head(csiEtm.gg) library(ggplot2) ggplot(csiEtm.gg, aes(time,P,group=trans)) + geom_step(aes(colour=trans)) + geom_rect(aes(xmin=time,xmax=timemax,ymin=lower,ymax=upper,fill=trans), alpha=0.5) ################################## ## The weighted product limit form ################################## library(mstate) # if not attached already ## create data set with weights aidssi.w <- crprep("time", "cause", data=aidssi, trans=c("AIDS","SI"), cens="event-free", id="patnr", keep="ccr5") ## alternative specification by values instead of column names aidssi.w <- with(aidssi, crprep(time, cause, trans=c("AIDS","SI"), cens="event-free", id=patnr, keep=ccr5)) ## calculate PL-form for SI as event survfit(Surv(Tstart,Tstop,status=="SI")~1, data=aidssi.w, weights=weight.cens, subset=failcode=="SI") ## both end points at the same time csiPL <- survfit(Surv(Tstart,Tstop,status==failcode)~failcode, data=aidssi.w, weights=weight.cens) ## summary summary(csiPL["failcode=SI"], times=seq(2,10,by=2)) ## select by number of stratum csiPL$strata summary(csiPL[2], times=seq(2,10,by=2)) ## FIGURE 2.9, comparison of CI's: Gaynor/Betensky and PL-based plot(csiSurv[2], mark.time=FALSE, lwd=3, xlim=c(0,13), xaxs="i", xlab="time since HIV infection (years)", ylim=c(0,0.44), conf.int=FALSE) lines(csiSurv[2], fun="event", mark.time=FALSE, conf.int="only", lty="solid", lwd=4, col="grey80") lines(csiPL[2], fun="event", mark.time=FALSE, conf.int="only", lty="solid", lwd=1) ## The log scale as used in survfit has not been implemented in the plot or lines function in etm ## We can use the CI as calculated and stored in csiEtmCI, gives same result as lines(upper.log~time, data=csiEtmCI, type="s", lwd=1, col="red") lines(lower.log~time, data=csiEtmCI, type="s", lwd=1, col="red") ########################################### ## Using the finegray function from survfit ########################################### aidssi.w2 <- finegray(Surv(time,status,type="mstate")~., data=aidssi) aidssi.w2 <- finegray(Surv(time,relevel(cause,"event-free"))~., aidssi, etype="AIDS") aidssi.w2 <- finegray(Surv(time,status,type="mstate")~., data=aidssi, etype=2) aidssi.w2 <- finegray(Surv(time,relevel(cause,"event-free"))~., aidssi, etype="SI") ################# ## Log-rank tests ################# ## log-rank test on cause-specific hazards survdiff(Surv(time,status==1)~ccr5, data=aidssi) # AIDS survdiff(Surv(time,status==2)~ccr5, data=aidssi) # SI ## same result based on score test lr.test <- coxph(Surv(time,status==1)~ccr5, data=aidssi)$score c(lr.test,1-pchisq(lr.test,1)) lr.test <- coxph(Surv(time,status==2)~ccr5, data=aidssi)$score c(lr.test,1-pchisq(lr.test,1)) ## log rank test on subdistribution hazard ## cmprsk package install.packages("cmprsk") # if not yet installed library(cmprsk) with(aidssi, cuminc(time, status, group=ccr5)$Tests) ## via estimate of subdistribution hazard, with CCR5 specific weights aidssi.wCCR <- crprep("time", "cause", data=aidssi, trans=c("AIDS","SI"), cens="event-free", strata="ccr5") ## test based on score test from proportional subdistribution hazards model test.AIDS <- coxph(Surv(Tstart,Tstop,status=="AIDS")~ccr5, data=aidssi.wCCR, weights=weight.cens, subset=failcode=="AIDS") test.SI <- coxph(Surv(Tstart,Tstop,status=="SI")~ccr5, data=aidssi.wCCR, weights=weight.cens, subset=failcode=="SI") ## score test statistic and p-value c(test.AIDS$score, 1-pchisq(test.AIDS$score,1)) # AIDS c(test.SI$score, 1-pchisq(test.SI$score,1)) # SI ## FIGURE 2.11: plot of cause-specific and subdistribution cumulative hazard to SI by CCR5 par(mfrow=c(1,2)) plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"&count==1)), mark.time=FALSE, lwd=c(2,2), lty=c(1,2), fun="cumhaz", xlim=c(0,13), ylim=c(0,0.7), xlab="time since HIV infection (years)") plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"), weights=weight.cens), mark.time=FALSE, lwd=c(2,2), lty=c(1,2), fun="cumhaz", xlim=c(0,13), ylim=c(0,0.7), xlab="time since HIV infection (years)") par(mfrow=c(1,1)) rm(csiSurvCI,csiEtmCI,csiEtm.gg,test.AIDS,test.SI,lr.test) ########################################################################### ## Chapter 3, Section 3.7 ########################################################################### ## We first need to create two extra columns for the state "3 AIDS/SI" in FIGURE 3.3 data(aidssi2) aidssi2 <- aidssi2[,c(1,2,3,4,5,6,5,6,7:10)] # duplicate info on SI state names(aidssi2)[c(7,8)] <- c("siaids.time","siaids.stat") # rename tmp.si.aids <- aidssi2$si.time < aidssi2$aids.time # SI switch before AIDS? ## In "1 AIDS" and "2 SI", we need to set the status column to zero if the other happened first aidssi2 <- within(aidssi2, { siaids.stat <- aids.stat*si.stat # 1 if both occurred siaids.time <- pmax(aids.time,si.time) aids.stat[tmp.si.aids] <- 0 # change to 0 if SI occurs first si.stat[!tmp.si.aids] <- 0 # change to 0 if AIDS occurs first }) ## If individuals had an SI switch before AIDS diagnosis, they never reached state "1 AIDS". Therefore, the ## column aids.time needs to be set at the end of the follow-up. The same holds for the six individuals that ## developed AIDS before having a switch to SI aidssi2$aids.time.orig <- aidssi2$aids.time aidssi2$si.time.orig <- aidssi2$si.time aidssi2$aids.time[aidssi2$aids.stat==0] <- aidssi2$death.time[aidssi2$aids.stat==0] aidssi2$si.time[aidssi2$si.stat==0] <- aidssi2$death.time[aidssi2$si.stat==0] rm(tmp.si.aids) ################## ## The etm package ################## library(etm) ## Specify allowed transitions trans.etm <- matrix(FALSE,nrow=5,ncol=5) trans.etm[rbind( c(1,2), c(1,3), c(2,4), c(2,5), c(3,4) ,c(4,5) )] <- TRUE trans.etm dimnames(trans.etm) <- list(c("HIV","AIDS", "SI", "AIDS/SI", "death"), c("HIV","AIDS", "SI", "AIDS/SI", "death")) # not required to create dimnames ## Create data set aidssi.etm <- etmprep(time=c(NA,"aids.time","si.time","siaids.time","death.time"), status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"), data=aidssi2, tra=trans.etm, cens.name="cens", state.names=c("HIV", "AIDS","SI","AIDS/SI", "death"), start=list(time=aidssi2$entry.time,state=rep("HIV",nrow(aidssi2)))) ## The computations ## First cumulative transition hazards library(mvna) # best estimated via mvna package NA.etm <- mvna(aidssi.etm, state.names=c("HIV", "AIDS","SI", "AIDS/SI", "death"), tra=trans.etm, cens.name="cens") summary(NA.etm) ## alternative, only for each transition separately library(survival) CumHaz.2to3 <- survfit(coxph(Surv(entry,exit,to=="AIDS/SI")~1, data=subset(aidssi.etm,from=="SI"))) ## Transition probabilities Prob.etm <- etm(aidssi.etm, state.names=c("HIV", "AIDS","SI", "AIDS/SI", "death") , tra=trans.etm, cens.name="cens", s=0) ## Transition probabilities starting at 4 years Prob.etm.s4 <- etm(aidssi.etm, c("HIV", "AIDS","SI", "AIDS/SI", "death") , trans.etm, "cens", s=4) ## Summarizing results via summary.etm summary(Prob.etm, ci.fun="log") # summary at six time points Summ.Prob.etm <- summary(Prob.etm, ci.fun="log") # summary at all time points Summ.Prob.etm[["SI AIDS/SI"]][seq(100,400,by=100),] Summ.Prob.etm[["HIV HIV"]][seq(100,400,by=100),] ## Summarizing results via trprob.etm and trcov.etm ## transition probability for a non-direct transition, e.g. from HIV to death Prob.hiv.death <- trprob(Prob.etm, tr.choice=c("HIV death")) Prob.hiv.death[seq(100,400,by=100)] # just four values # Create data frame with the estimates Prob.hiv.death <- data.frame(time=as.numeric(names(Prob.hiv.death)), prob=Prob.hiv.death) row.names(Prob.hiv.death) <- 1:nrow(Prob.hiv.death) tmp <- trcov(Prob.etm, tr.choice=c("HIV death")) Prob.hiv.death$lower.ci <- pmax(0,1-(1-Prob.hiv.death$prob)* exp(qnorm(0.975)*sqrt(tmp)/(1-Prob.hiv.death$prob))) Prob.hiv.death$upper.ci <- 1-(1-Prob.hiv.death$prob)* exp(-qnorm(0.975)*sqrt(tmp)/(1-Prob.hiv.death$prob)) ## Probability to live with AIDS, combined state Prob.hiv.aids <- trprob(Prob.etm, "HIV AIDS") + trprob(Prob.etm, "HIV AIDS/SI") Prob.hiv.aids <- data.frame(time=as.numeric(names(Prob.hiv.aids)),prob=Prob.hiv.aids) row.names(Prob.hiv.aids) <- 1:nrow(Prob.hiv.aids) ## Confidence intervals, using formula (1.20) Var.hiv.aids <- trcov(Prob.etm, "HIV AIDS") + trcov(Prob.etm, "HIV AIDS/SI") + 2*trcov(Prob.etm, c("HIV AIDS","HIV AIDS/SI")) Prob.hiv.aids$lower.ci <- pmax(0,1-(1-Prob.hiv.aids$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob))) Prob.hiv.aids$upper.ci <- 1-(1-Prob.hiv.aids$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob)) ## Plotting the results ## Directly via plot command from etm package. Aalen-Johansen estimate in FIGURE 3.9 is obtained via plot(Prob.etm, tr.choice=c("HIV death"), lwd=2, conf.int=TRUE, ci.fun="log", legend=FALSE, ylim=c(0,0.75)) # we add the confidence intervals on the other log scale: lines(lower.ci~time, data=Prob.hiv.death, col="red") lines(upper.ci~time, data=Prob.hiv.death, col="red") ## FIGURE 3.4: Nelson-Aalen estimates plot(NA.etm, tr.choice=c("HIV SI","HIV AIDS"), lwd=c(3,3), lty=c(2,1), col=c(rep("black",2)), ylim=c(0,7), xlim=c(0,13), xlab="time since HIV infection (years)", ylab="cumulative transition hazard", legend=FALSE) lines(NA.etm, tr.choice=c("SI AIDS/SI","AIDS AIDS/SI"), lwd=c(2,2), lty=c(2,1), col=c(rep("black",2))) lines(NA.etm, tr.choice=c("AIDS/SI death"), col=c("grey50")) lines(NA.etm, tr.choice=c("AIDS death"), lwd=2, col=c("grey50")) legend(1,6, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n") # the same estimates are obtained using the survival package, for example lines(CumHaz.2to3, fun="cumhaz", col="red", mark.time=FALSE, conf.int=FALSE) ## FIGURE 3.5: transition probabilities for all direct transitions plot(Prob.etm , c("HIV SI","HIV AIDS"), lwd=c(3,3), lty=c(2,1), col=c(rep("black",2)), legend=FALSE, xlim=c(0,13), xlab="time since HIV infection (years)", ylab="transition probability") lines(Prob.etm, tr.choice=c("SI AIDS/SI","AIDS AIDS/SI"), lwd=c(2,2), lty=c(2,1), col=c(rep("black",2))) lines(Prob.etm, tr.choice=c("AIDS/SI death"), col=c("grey50")) lines(Prob.etm, tr.choice=c("AIDS death"), lwd=2, col=c("grey50")) legend(7,0.65,c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n") ## FIGURE 3.6 is similar, but uses Prob.etm.s4 ## FIGURE 3.7 plot(Prob.etm, c("HIV AIDS"), lty=2, lwd=3, legend=FALSE, xlim=c(0,13), xlab="time since HIV infection (years)", ylab="transition probability", ylim=c(0,0.6)) lines(Prob.etm, tr.choice=c("HIV SI"), lwd=3, lty=3) lines(Prob.etm, tr.choice=c("HIV AIDS/SI"), lwd=3, col="grey50") lines(Prob.etm,tr.choice=c("HIV death"), col="black") lines(Prob.etm,tr.choice=c("HIV HIV"),lwd=3, col="black") legend(0.5,0.5,c("0 HIV","1 AIDS","2 SI","3 AIDS/SI","4 death"), lwd=c(3,3,3,3,1), lty=c(1,2,3,1,1), col=c(rep("black",3),"grey50","black"), bty="n") ## FIGURE 3.8 (note: CI in FIGURE 3.8 slightly different, were based on the log scale in formula (1.23)) plot(prob~time, data=Prob.hiv.aids, type="s", xlab="time since HIV infection (years)", ylab="probability to be alive with AIDS", xlim=c(0,13), ylim=c(0,0.22), xaxs="i", lwd=3) lines(lower.ci~time, data=Prob.hiv.aids, type="s", lty=3) lines(upper.ci~time, data=Prob.hiv.aids, type="s", lty=3) lines(Prob.etm, tr.choice=c("HIV AIDS"), lwd=2) ## FIGURE 3.9: compare Kaplan-Meier and Aalen-Johansen estimator ## We create some temporary variables that allow us to make a gray confidence band tmp <- survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2) x1 <- rep(tmp$time, each=2)[-1] x2 <- rev(rep(tmp$time, each=2)[-1]) y1 <- 1-rep(tmp$lower,each=2) y1 <- y1[-length(y1)] y2 <- 1-rep(tmp$upper,each=2) y2 <- rev(y2[-length(y2)]) ## Creating the plot par(las=1,mar=c(5.1,7.1,1,1)) plot(1,1, xlim=c(0, 13), xlab="time since HIV infection (years)", ylab="transition probability to death", ylim=c(0,0.7), type="n", xaxs="i") polygon(c(x1,x2), c(y1,y2), density=-1, col=rgb(0.8,0.8,0.8, alpha=0.7), border=NA) lines(survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2), fun="event", lwd=2, mark.time=FALSE, col="grey50", conf=FALSE) lines(prob~time, data=Prob.hiv.death, lwd=2, type="s") lines(lower.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s") lines(upper.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s") rm(x1,x2,y1,y2,tmp) ## FIGURE 3.11: State-AIDS entry distribution # First redefine the model trans.etm.AIDS <- matrix(FALSE,nrow=4,ncol=4) trans.etm.AIDS[rbind( c(1,2), c(1,3), c(3,4) )] <- TRUE dimnames(trans.etm.AIDS) <- list(c("HIV","AIDS", "SI", "AIDS/SI"), c("HIV","AIDS", "SI", "AIDS/SI")) # Computation of transition probabilities (similar as above) aidssi.etm.AIDS <- subset(aidssi.etm, !((from=="AIDS")|(from=="AIDS/SI")|(to=="death"))) Prob.etm.AIDS <- etm(aidssi.etm.AIDS, state.names=c("HIV", "AIDS","SI", "AIDS/SI") , tra=trans.etm.AIDS, cens.name="cens", s=0) Prob.hiv.aids2 <- trprob(Prob.etm.AIDS, "HIV AIDS") + trprob(Prob.etm.AIDS, "HIV AIDS/SI") Prob.hiv.aids2 <- data.frame(time=as.numeric(names(Prob.hiv.aids2)),prob=Prob.hiv.aids2) row.names(Prob.hiv.aids2) <- 1:nrow(Prob.hiv.aids2) Var.hiv.aids <- trcov(Prob.etm.AIDS, "HIV AIDS") + trcov(Prob.etm.AIDS, "HIV AIDS/SI") + 2*trcov(Prob.etm.AIDS, c("HIV AIDS","HIV AIDS/SI")) Prob.hiv.aids2$lower.ci <- pmax(0,1-(1-Prob.hiv.aids2$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob))) Prob.hiv.aids2$upper.ci <- 1-(1-Prob.hiv.aids2$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob)) ## Plotting plot(prob~time, data=Prob.hiv.aids2, type="s", xlab="time since HIV infection (years)", ylab="probability to develop AIDS", xlim=c(0,13), ylim=c(0,0.8), xaxs="i", lwd=3) lines(prob~time, data=Prob.hiv.aids,type="s", col="grey") # add Kaplan-Meier lines(survfit(Surv(aids.time.orig, pmax(aids.stat,siaids.stat))~1, data=aidssi2), fun="event", mark.time=FALSE, col="red") ##################### ## The msSurv package ##################### ## code to download and install the graph package source("http://bioconductor.org/biocLite.R") biocLite("graph") library(msSurv) ## Specify allowed transitions Nodes <- c("HIV","AIDS","SI","AIDS/SI","death") Edges <- list("HIV" = list(edges=c("AIDS","SI")), "AIDS" = list(edges=c("AIDS/SI","death")), "SI" = list(edges=c("AIDS/SI")), "AIDS/SI" = list(edges=c("death")), "death" = list(edges=NULL)) ms.Diag <- new("graphNEL", nodes=Nodes, edgeL=Edges, edgemode="directed") ## Create data set, use aidssi.etm as basis aidssi.msSurv <- aidssi.etm names(aidssi.msSurv)[1:5] <- c("id","start","stop","start.stage","end.stage") # fixed names # Make state columns into character format. Censored data are specified via value "0". aidssi.msSurv$start.stage <- as.character(aidssi.msSurv$start.stage) aidssi.msSurv$end.stage <- as.character(aidssi.msSurv$end.stage) aidssi.msSurv[aidssi.msSurv$end.stage=="cens","end.stage"] <- "0" ## The computations ## Cumulative hazards and transition probabilities Prob.msSurv <- msSurv(aidssi.msSurv, ms.Diag, LT=TRUE) ## Summarizing results ## Same time points as seq(100,400,by=100) in summary.etm above, for comparison ## Estimates are the same, CI's are different because the scale is different summary(Prob.msSurv, stateocc=FALSE, dist=FALSE, time=c(1.9903052703628,3.64691649555084,5.4784,6.82859274469547), ci.fun="log") ## Transition probability from HIV to death trProb.est <- AJs(Prob.msSurv) trProb.var <- cov.AJs(Prob.msSurv) Prob.hiv.death <- data.frame(time=et(Prob.msSurv), prob=trProb.est[1,5,], var=trProb.var["HIV death","HIV death",]) row.names(Prob.hiv.death) <- 1:nrow(Prob.hiv.death) Prob.hiv.death$lower.ci <- pmax(0,1-(1-Prob.hiv.death$prob)* exp(qnorm(0.975)*sqrt(Prob.hiv.death$var)/(1-Prob.hiv.death$prob))) Prob.hiv.death$upper.ci <- 1-(1-Prob.hiv.death$prob)* exp(-qnorm(0.975)*sqrt(Prob.hiv.death$var)/(1-Prob.hiv.death$prob)) ## Transition probabilities starting at s=4 ## idx gives the indices of the event times after 4 years idx <- which(4 <= et(Prob.msSurv) ) trProb.est4 <- array(0,dim=c(5,5,length(idx))) trProb.est4[ , ,1] <- I.dA(Prob.msSurv)[ , ,idx[1]] for (i in 2:length(idx)) trProb.est4[ , ,i] <- trProb.est4[ , ,i-1] %*% I.dA(Prob.msSurv)[ , ,idx[i]] dimnames(trProb.est4) <- list(from=c("HIV","AIDS","SI","AIDS/SI","death"), to=c("HIV","AIDS","SI","AIDS/SI","death"), time=et(Prob.msSurv)[idx]) ## Check via Pst whether results are the same, e.g for s=4, t=10 Pst(Prob.msSurv, s=4, t=10) sum(et(Prob.msSurv)[idx]<10) # 306 trProb.est4[,,306] ## Probability to live with AIDS, combined state Prob.hiv.aids <- AJs(Prob.msSurv)[1,2,] + AJs(Prob.msSurv)[1,4,] Prob.hiv.aids <- data.frame(time=et(Prob.msSurv), prob=Prob.hiv.aids) row.names(Prob.hiv.aids) <- 1:nrow(Prob.hiv.aids) Var.hiv.aids <- cov.AJs(Prob.msSurv)["HIV AIDS","HIV AIDS",] + cov.AJs(Prob.msSurv)["HIV AIDS/SI","HIV AIDS/SI",] + 2*cov.AJs(Prob.msSurv)["HIV AIDS","HIV AIDS/SI",] Prob.hiv.aids$lower.ci <- pmax(0,1-(1-Prob.hiv.aids$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob))) Prob.hiv.aids$upper.ci <- 1-(1-Prob.hiv.aids$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob)) ## Fixed horizon transition probabilities ending at t=10 idx <- which( et(Prob.msSurv)>=2 & et(Prob.msSurv) <=10) n.time <- length(idx) FixedHor10 <- array(0,dim=c(5,5,n.time)) FixedHor10[,,n.time] <- I.dA(Prob.msSurv)[,,idx[n.time]] for (i in (n.time-1):1) FixedHor10[,,i] <- I.dA(Prob.msSurv)[,,idx[i]]%*%FixedHor10[,,i+1] dimnames(FixedHor10) <- list(from=c("HIV","AIDS","SI","AIDS/SI","death"), to=c("HIV","AIDS","SI","AIDS/SI","death"),time=c(et(Prob.msSurv)[idx-1])) ## Plotting the results ## all state occupation probabilities, directly via plot command from msSurv package plot(Prob.msSurv, plot.type="stateocc") ## FIGURE 3.4: Nelson-Aalen estimates par(las=1) plot(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[1,3,]), lwd=3, lty=2, ylim=c(0,7), xlim=c(0,13), xlab="time since HIV infection (years)", ylab="cumulative transition hazard", type="s") lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[1,2 ,]), lwd=3, type="s") lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[2,4 ,]), lwd=2, type="s") lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[3,4 ,]), lwd=2, type="s", lty=2) lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[4,5 ,]), col=c("grey50"), type="s") lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[2,5 ,]), lwd=2, col=c("grey50"), type="s") legend(1,6, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n") ## an alternative way is via ## cumsum(dNs(Prob.msSurv)[,"dN 1 2"]/Ys(Prob.msSurv)[,"y 1"]) etc. ## the same estimates can be obtained using the survival package, for example CumHaz.2to3 <- survfit(coxph(Surv(start,stop,end.stage=="AIDS/SI")~1, data=subset(aidssi.msSurv,start.stage=="SI"))) lines(CumHaz.2to3,fun="cumhaz",col="red",mark.time=FALSE,conf.int=FALSE) ## FIGURE 3.5: transition probabilities for all direct transitions par(las=1) plot(et(Prob.msSurv), AJs(Prob.msSurv)[1,2 ,], lwd=3, xlim=c(0,13), xlab="time since HIV infection (years)", ylab="transition probability", type="s", ylim=c(0,1)) lines(et(Prob.msSurv), AJs(Prob.msSurv)[1,3 ,], lwd=3, lty=2, type="s") lines(et(Prob.msSurv), AJs(Prob.msSurv)[2,4 ,], lwd=2, type="s") lines(et(Prob.msSurv), AJs(Prob.msSurv)[3,4 ,], lwd=2, type="s", lty=2) lines(et(Prob.msSurv), AJs(Prob.msSurv)[2,5 ,], lwd=2, col=c("grey50"), type="s") lines(et(Prob.msSurv), AJs(Prob.msSurv)[4,5 ,], col=c("grey50"), type="s") legend(7,0.65,c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n") ## FIGURE 3.6 is similar, but uses trProb.est4 ## FIGURE 3.7 par(las=1) plot(et(Prob.msSurv),AJs(Prob.msSurv)[1,2,],lty=2,lwd=3,xlim=c(0,13),xlab="time since HIV infection (years)",ylab="transition probability", ylim=c(0,0.6),type="s") lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,3,],lwd=3,lty=3) lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,4,],lwd=3,lty=1,col="grey50") lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,5,],lwd=1,lty=1,col="black") lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,1,],lwd=3,lty=1,col="black") legend(0.5,0.5, c("0 HIV","1 AIDS","2 SI","3 AIDS/SI","4 death"), lwd=c(3,3,3,3,1), lty=c(1,2,3,1,1), col=c(rep("black",3),"grey50","black"), bty="n") ## FIGURE 3.8 (note: CI in FIGURE 3.8 slightly different, were based on the log scale in formula (1.23)) plot(prob~time, data=Prob.hiv.aids, type="s", xlab="time since HIV infection (years)", ylab="probability to be alive with AIDS", xlim=c(0,13), ylim=c(0,0.22), xaxs="i", lwd=3) lines(lower.ci~time, data=Prob.hiv.aids, type="s", lty=3) lines(upper.ci~time, data=Prob.hiv.aids, type="s", lty=3) lines(et(Prob.msSurv), AJs(Prob.msSurv)[1,2 ,], lwd=2, type="s") ## FIGURE 3.9: compare Kaplan-Meier and Aalen-Johansen estimator ## We create some temporary variables that allow us to make a gray confidence band tmp <- survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2) x1 <- rep(tmp$time, each=2)[-1] x2 <- rev(rep(tmp$time, each=2)[-1]) y1 <- 1-rep(tmp$lower,each=2) y1 <- y1[-length(y1)] y2 <- 1-rep(tmp$upper,each=2) y2 <- rev(y2[-length(y2)]) ## Creating the plot par(las=1,mar=c(5.1,7.1,1,1)) plot(1,1, xlim=c(0, 13), xlab="time since HIV infection (years)", ylab="transition probability to death", ylim=c(0,0.7), type="n", xaxs="i") polygon(c(x1,x2), c(y1,y2), density=-1, col=rgb(0.8,0.8,0.8, alpha=0.7), border=NA) lines(survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2), fun="event", lwd=2, mark.time=FALSE, col="grey50", conf=FALSE) lines(prob~time, data=Prob.hiv.death, lwd=2, type="s") lines(lower.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s") lines(upper.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s") rm(x1,x2,y1,y2,tmp) ## FIGURE 3.10, fixed horizon prediction plot(et(Prob.msSurv)[idx], FixedHor10[1,5,], type="s", ylim=c(0,1), xaxs="i", lwd=2, xlim=c(2,10), xlab="time since HIV infection (years)", ylab="10 years transition probability", cex.axis=0.9) lines(et(Prob.msSurv)[idx], FixedHor10[1,1,], type="s", lty=3, lwd=2) lines(et(Prob.msSurv)[idx], FixedHor10[3,5,], type="s") lines(et(Prob.msSurv)[idx], FixedHor10[3,3,], type="s", lty=2) lines(et(Prob.msSurv)[idx], FixedHor10[1,2,]+FixedHor10[1,4,], type="s", col="grey", lwd=2) lines(et(Prob.msSurv)[idx], FixedHor10[3,4,], type="s", col="grey", lwd=1) legend("top",c("death from HIV","death from SI","still in HIV state","still in SI state","AIDS from HIV","AIDS from SI"), lwd=c(2,1,2,1,2,1), lty=c(1,1,3,2,1,1), col=c(rep("black",4), rep("grey",2)), bty="n") ## FIGURE 3.11: State-AIDS entry distribution # First redefine the model Nodes <- c("HIV","AIDS","SI","AIDS/SI") Edges <- list("HIV" = list(edges=c("AIDS","SI")), "AIDS" = list(edges=NULL), "SI" = list(edges=c("AIDS/SI")), "AIDS/SI" = list(edges=NULL)) ms.Diag.AIDS <- new("graphNEL", nodes=Nodes, edgeL=Edges, edgemode="directed") # Calculation of transition probabilities aidssi.msSurv.AIDS <- subset(aidssi.msSurv, !((start.stage=="AIDS")|(start.stage=="AIDS/SI")|(end.stage=="death"))) Prob.msSurv.AIDS <- msSurv(aidssi.msSurv.AIDS[,1:5], ms.Diag.AIDS, LT=TRUE) Prob.hiv.aids2 <- AJs(Prob.msSurv.AIDS)[1,2,] + AJs(Prob.msSurv.AIDS)[1,4,] Prob.hiv.aids2 <- data.frame(time=et(Prob.msSurv.AIDS), prob=Prob.hiv.aids2) row.names(Prob.hiv.aids2) <- 1:nrow(Prob.hiv.aids2) Var.hiv.aids <- cov.AJs(Prob.msSurv.AIDS)["HIV AIDS","HIV AIDS",] + cov.AJs(Prob.msSurv.AIDS)["HIV AIDS/SI","HIV AIDS/SI",] + 2*cov.AJs(Prob.msSurv.AIDS)["HIV AIDS","HIV AIDS/SI",] Prob.hiv.aids2$lower.ci <- pmax(0,1-(1-Prob.hiv.aids2$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob))) Prob.hiv.aids2$upper.ci <- 1-(1-Prob.hiv.aids2$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob)) # Plotting plot(prob~time, data=Prob.hiv.aids2, type="s", xlab="time since HIV infection (years)", ylab="probability to develop AIDS", xlim=c(0,13), ylim=c(0,0.8), xaxs="i", lwd=3) lines(prob~time, data=Prob.hiv.aids, type="s", col="grey") # add Kaplan-Meier lines(survfit(Surv(aids.time.orig, pmax(aids.stat,siaids.stat))~1, data=aidssi2), fun="event", mark.time=FALSE, col="red") ##################### ## The mstate package ##################### library(mstate) ## Specify allowed transitions trans.mst <- transMat(x = list(c(2,3), c(4,5), 4, 5, c()), names = c("HIV", "AIDS", "SI", "AIDS/SI", "death")) trans.mst ## create data set aidssi.mst <- msprep(aidssi2, trans=trans.mst, time=c(NA,"aids.time","si.time","siaids.time","death.time"), status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"), start=list(time=aidssi2$entry.time,state=rep(1,nrow(aidssi2)))) ## table with observed transitions events(aidssi.mst) ## The computations NA.surv <- coxph(Surv(Tstart,Tstop,status)~strata(trans), data=aidssi.mst) # step 1 NA.mst <- msfit(NA.surv, vartype="greenwood", trans=trans.mst) # step 2 Prob.mst <- probtrans(NA.mst, predt=0, method="greenwood") # step 3 ## Transition probabilities starting at s=4 Prob.mst4 <- probtrans(NA.mst, predt=4, method="greenwood") ## Fixed horizon transition probabilities ending at t=10 Prob.mst.R <- probtrans(NA.mst, predt=10, direction="fixedhorizon") ## Summarizing results summary(NA.mst) # Nelson-Aalen summary(Prob.mst) # Aalen-Johansen summary(Prob.mst, from=1) # from state 1 only ## More flexibility with output Prob.mst from probtrans ## Transition probability from HIV, state 1, to death, state 5 Prob.hiv.death <- data.frame(time=Prob.mst[[1]]$time, prob=Prob.mst[[1]]$pstate5, se=Prob.mst[[1]]$se5) Prob.hiv.death$upper.ci <- 1-(1-Prob.hiv.death$prob)* exp(-qnorm(0.975)*Prob.hiv.death$se/(1-Prob.hiv.death$prob)) Prob.hiv.death$lower.ci <- pmax(0,1-(1-Prob.hiv.death$prob)* exp(qnorm(0.975)*Prob.hiv.death$se/(1-Prob.hiv.death$prob))) ## Probability to live with AIDS, combined state ## We need to redo the computations in order to store the covariance information as well Prob.mst <- probtrans(NA.mst, predt=0, method="greenwood", covariance=TRUE) Prob.hiv.aids <- data.frame(time=Prob.mst[[1]]$time, prob=Prob.mst[[1]]$pstate2+Prob.mst[[1]]$pstate4) ## Covariance information is stored in a separate component, which is a three-dimensional array. ## First two components define "from" and "to states; last component has the values dimnames(Prob.mst[[6]][,,1]) # check how the elements are named Var.hiv.aids <- Prob.mst[[1]]$se2^2 + Prob.mst[[1]]$se4^2 + 2*Prob.mst[[6]]["from1to2","from1to4",] Prob.hiv.aids$lower.ci <- pmax(0,1-(1-Prob.hiv.aids$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob))) Prob.hiv.aids$upper.ci <- 1-(1-Prob.hiv.aids$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob)) ## Plotting the results plot(Prob.mst, type="single", lwd=c(3,3,2,2,2,1), lty=c(1,2,2,1,1,1), col=c(rep("black",4), rep("grey50",2))) # all transition probabilities from state 1, HIV ## another display format plot(Prob.mst, type="filled", ord=1:5) ## FIGURE 3.4: Nelson-Aalen estimates ## With stacked format is easy via standard survival code par(las=1) plot(survfit(NA.surv), fun="cumhaz", mark.time=FALSE, lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1),col=c(rep("black",3),"grey50","black","grey50"),ylim=c(0,7),xlim=c(0,13)) legend(1,6, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n") ## Alternative: via plot function for msfit object (has a bug with respect to lwd and lty) plot(NA.mst, lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",3),"grey50","black","grey50"), ylim=c(0,7), xlim=c(0,13), legend=c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), bty="n", legend.pos=c(1,6)) ## FIGURE 3.5: transition probabilities for all direct transitions par(las=1) plot(pstate2~time, data=Prob.mst[[1]], lwd=3, type="s", xlim=c(0,13), xlab="time since HIV infection (years)", ylim=c(0,1)) lines(pstate3~time, data=Prob.mst[[1]], lwd=3, lty=2, type="s") lines(pstate4~time, data=Prob.mst[[2]], lwd=2, type="s") lines(pstate4~time, data=Prob.mst[[3]], lwd=2, lty=2, type="s") lines(pstate5~time, data=Prob.mst[[4]], type="s", col="grey50") lines(pstate5~time, data=Prob.mst[[2]], lwd=2, type="s", col="grey50") legend(7,0.65,c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n") ## FIGURE 3.6 is similar, but uses Prob.mst4 ## FIGURE 3.7 plot(Prob.mst,from=1, type="single", xlab="time since HIV infection (years)", ylab="transition probability", xlim=c(0,13), ylim=c(0,0.6), lwd=c(3,3,3,3,1), lty=c(1,2,3,1,1), col=c(rep("black",3),"grey50","black","grey50"), legend=c("0 HIV","1 AIDS","2 SI","3 AIDS/SI","4 death"), legend.pos="left") ## FIGURE 3.8 (note: CI in FIGURE 3.8 slightly different, were based on the log scale in formula (1.23)) plot(prob~time, data=Prob.hiv.aids, type="s", xlab="time since HIV infection (years)", ylab="probability to be alive with AIDS", xlim=c(0,13), ylim=c(0,0.22), xaxs="i", lwd=3) lines(lower.ci~time, data=Prob.hiv.aids, type="s", lty=3) lines(upper.ci~time, data=Prob.hiv.aids, type="s", lty=3) lines(pstate2~time, data=Prob.mst[[1]], lwd=2, type="s") ## FIGURE 3.9: compare Kaplan-Meier and Aalen-Johansen estimator ## We create some temporary variables that allow us to make a gray confidence band tmp <- survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2) x1 <- rep(tmp$time, each=2)[-1] x2 <- rev(rep(tmp$time, each=2)[-1]) y1 <- 1-rep(tmp$lower,each=2) y1 <- y1[-length(y1)] y2 <- 1-rep(tmp$upper,each=2) y2 <- rev(y2[-length(y2)]) ## Creating the plot par(las=1,mar=c(5.1,7.1,1,1)) plot(1,1, xlim=c(0, 13), xlab="time since HIV infection (years)", ylab="transition probability to death", ylim=c(0,0.7), type="n", xaxs="i") polygon(c(x1,x2), c(y1,y2), density=-1, col=rgb(0.8,0.8,0.8, alpha=0.7), border=NA) lines(survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2), fun="event", lwd=2, mark.time=FALSE, col="grey50", conf=FALSE) lines(prob~time, data=Prob.hiv.death, lwd=2, type="s") lines(lower.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s") lines(upper.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s") rm(x1,x2,y1,y2,tmp) ## FIGURE 3.10, fixed horizon prediction plot(pstate5~time, data=Prob.mst.R[[1]], xaxs="i", type="s", ylim=c(0,1), lwd=2, xlim=c(2,10), xlab="time since HIV infection (years)", ylab="10 years transition probability") lines(pstate1~time, data=Prob.mst.R[[1]], type="s", lty=3, lwd=2) lines(pstate5~time, data=Prob.mst.R[[3]], type="s") lines(pstate3~time, data=Prob.mst.R[[3]], type="s", lty=2) lines(I(pstate2+pstate4)~time, data=Prob.mst.R[[1]], type="s", col="grey", lwd=2) lines(pstate4~time, data=Prob.mst.R[[3]], type="s", col="grey") legend("top",c("death from HIV","death from SI","still in HIV state","still in SI state","AIDS from HIV","AIDS from SI"), lwd=c(2,1,2,1,2,1), lty=c(1,1,3,2,1,1), col=c(rep("black",4), rep("grey",2)), bty="n") ## FIGURE 3.11: State-AIDS entry distribution # First redefine the model trans.mst.AIDS <- transMat(x = list(c(2,3), c(), 4, c()), names = c("HIV", "AIDS", "SI", "AIDS/SI")) trans.mst.AIDS # Calculation of transition probabilities aidssi.mst.AIDS <- subset(aidssi.mst, !((trans==3)|(trans==4)|(trans==6))) NA.surv <- coxph(Surv(Tstart,Tstop,status)~strata(trans), data=aidssi.mst.AIDS) NA.mst <- msfit(NA.surv, vartype="greenwood", trans=trans.mst.AIDS) Prob.mst.AIDS <- probtrans(NA.mst, predt=0, method="greenwood", covariance=TRUE) Prob.hiv.aids2 <- data.frame(time=Prob.mst.AIDS[[1]]$time, prob=Prob.mst.AIDS[[1]]$pstate2+Prob.mst.AIDS[[1]]$pstate4) Var.hiv.aids <- Prob.mst.AIDS[[1]]$se2^2 + Prob.mst.AIDS[[1]]$se4^2 + 2*Prob.mst.AIDS[[5]]["from1to2","from1to4",] Prob.hiv.aids2$lower.ci <- pmax(0,1-(1-Prob.hiv.aids2$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob))) Prob.hiv.aids2$upper.ci <- 1-(1-Prob.hiv.aids2$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob)) # Plotting plot(prob~time, data=Prob.hiv.aids2, type="s", xlab="time since HIV infection (years)", ylab="probability to develop AIDS", xlim=c(0,13), ylim=c(0,0.8), xaxs="i", lwd=3) lines(prob~time, data=Prob.hiv.aids, type="s", col="grey") # add Kaplan-Meier lines(survfit(Surv(aids.time.orig, pmax(aids.stat,siaids.stat))~1, data=aidssi2), fun="event", mark.time=FALSE, col="red") ########################################################################## ## Chapter 4 ########################################################################## ############################## ## Section 4.2 ############################## ## Separate analyses per event type coxph(Surv(time, status==1) ~ ccr5, data = aidssi) coxph(Surv(time, status==2) ~ ccr5, data = aidssi) ############################### ## Section 4.3 ############################### ## 1. Same results in one analysis ################################## ## Creation of stacked data set (via the crprep function); creation of compound ## and type-specific covariables (without using expand.covs) aidssi.w <- crprep("time", "cause", data=aidssi, trans=c("AIDS","SI"), cens="event-free", id="patnr", keep="ccr5") aidssi.w <- within(aidssi.w,{ ccrSI <- ifelse(failcode=="SI"&ccr5=="WM",1,ifelse(is.na(ccr5),NA,0)) ccrAIDS <- ifelse(failcode=="AIDS"&ccr5=="WM",1,ifelse(is.na(ccr5),NA,0)) ccr.comb <- factor(ccrAIDS+2*ccrSI,labels=c("WW","WM.AIDS","WM.SI")) }) ## Select only rows that correspond to actual follow-up aidssi.stack <- subset(aidssi.w, count==1) ## Some further code to make data correspond with the book; not necessary to perform analyses aidssi.stack$Tstart <- aidssi.stack$weight.cens <- aidssi.stack$count <- NULL aidssi.stack <- aidssi.stack[order(aidssi.stack$patnr),] aidssi.stack$status <- ifelse(aidssi.stack$status==aidssi.stack$failcode,1,0) names(aidssi.stack)[2] <- "time" subset(aidssi.stack, patnr %in% c(14,3,15,8))[c(5,6,1,2,7,8,3,4),c(1:3,5,4,6:8)] # TABLE 4.3 ## Combined analysis with interaction term coxph(Surv(time, status) ~ ccr5 * strata(failcode), data = aidssi.stack) ## An alternative, but not recommended coxph(Surv(time, status) ~ ccr5 * failcode + strata(failcode), data = aidssi.stack) ## Combined analysis with ccr.comb variable fit.stacked <- coxph(Surv(time, status) ~ ccr.comb + strata(failcode), data = aidssi.stack) fit.stacked ## A more detailed description is obtained using the summary function on the fit summary(fit.stacked) ## Robust standard error coxph(Surv(time, status) ~ ccr.comb + strata(failcode) + cluster(patnr), data = aidssi.stack) ## 2. Type-specific covariables ############################### coxph(Surv(time, status) ~ ccrAIDS + ccrSI + strata(failcode), data = aidssi.stack) ## Different contrast, such that one term quantifies the additional effect coxph(Surv(time, status) ~ ccr5 + ccrSI + strata(failcode), data = aidssi.stack) coxph(Surv(time, status) ~ ccr5 + ccrAIDS + strata(failcode), data = aidssi.stack) ## 3. Effects equal over causes ############################### coxph(Surv(time, status) ~ ccr5 + strata(failcode), data = aidssi.stack) ## Same result via coxph(Surv(time, status!=0) ~ ccr5, data = aidssi) ## Robust standard error coxph(Surv(time, status) ~ ccr5 + strata(failcode) + cluster(patnr), data = aidssi.stack) ## 4. Proportional baseline hazards ################################### coxph(Surv(time, status) ~ ccrAIDS + ccrSI + failcode, data = aidssi.stack) ## Test for non-proportionality cox.zph(coxph(Surv(time, status) ~ ccrAIDS + ccrSI + failcode, data = aidssi.stack)) ## Proportional baseline hazards and effect ccr5 equal coxph(Surv(time, status) ~ ccr5 + failcode, data = aidssi.stack) ## Section 4.4.3: effect failcode is also obtained from logistic regression summary(glm(cause=="SI"~1, data=aidssi, subset=status>0&!is.na(ccr5), family=binomial)) ## or directly from the data event.table <- table(subset(aidssi, !is.na(ccr5))$cause) qlogis(event.table["SI"]/(event.table["AIDS"]+event.table["SI"])) ############################## ## Section 4.5 ############################## ## We first repeat the code that was shown above (as in section 3.7.3) ## Only run this if you need to recreate the aidssi2 data set data(aidssi2) aidssi2 <- aidssi2[,c(1,2,3,4,5,6,5,6,7:10)] names(aidssi2)[c(7,8)] <- c("siaids.time","siaids.stat") tmp.si.aids <- aidssi2$si.time < aidssi2$aids.time aidssi2 <- within(aidssi2, { siaids.stat <- aids.stat*si.stat siaids.time <- pmax(aids.time,si.time) aids.stat[tmp.si.aids] <- 0 si.stat[!tmp.si.aids] <- 0 }) aidssi2$aids.time.orig <- aidssi2$aids.time aidssi2$si.time.orig <- aidssi2$si.time aidssi2$aids.time[aidssi2$aids.stat==0] <- aidssi2$death.time[aidssi2$aids.stat==0] aidssi2$si.time[aidssi2$si.stat==0] <- aidssi2$death.time[aidssi2$si.stat==0] rm(tmp.si.aids) trans.mst <- transMat(x = list(c(2,3), c(4,5), 4, 5, c()), names = c("HIV", "AIDS", "SI", "AIDS/SI", "death")) ## Create data set in stacked format, we include three (co)variables via the keep argument aidssi.mst <- msprep(aidssi2, trans=trans.mst, time=c(NA,"aids.time","si.time","siaids.time","death.time"), status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"), start=list(time=aidssi2$entry.time,state=rep(1,nrow(aidssi2))), id="patnr", keep=c("ccr5","age.inf","aids.time")) # create type-specific variables aidssi.mst <- expand.covs(aidssi.mst, covs=c("ccr5","age.inf"), longnames=FALSE) ## separate analysis by end point for(i in 1:6) print(coxph(Surv(Tstart,Tstop,status)~ccr5+I(age.inf/10),data=aidssi.mst, subset=trans==i)) ## 1. Combined analyses: assume effects to be equal ################################################### ## Combine ccr5 effect for transitions 2 and 3 and remove ccr5.2 from data aidssi.mst$ccr5.23 <- aidssi.mst$ccr5.2+aidssi.mst$ccr5.3 aidssi.mst$ccr5.2 <- NULL ## Likelihood ratio test whether age effect can be assumed equal for all transitions ## Fit model with all transition-specific covariables fit.aidssi.all <- coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf.1/10)+ I(age.inf.2/10)+ I(age.inf.3/10)+ I(age.inf.4/10)+ I(age.inf.5/10) + I(age.inf.6/10), data=aidssi.mst) ## and model with equal age effects fit.aidssi.mst <- coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf/10), data=aidssi.mst) ## Likelihood ratio test for from significant anova(fit.aidssi.all,fit.aidssi.mst) ## Since we will use one overall age variable, we remove the type-specific variables aidssi.mst$age.inf.1 <- aidssi.mst$age.inf.2 <- aidssi.mst$age.inf.3 <- aidssi.mst$age.inf.4 <- aidssi.mst$age.inf.5 <- aidssi.mst$age.inf.6 <- NULL ## Summary data set summary(fit.aidssi.mst) ## Test effect on pathway HIV -> SI -> AIDS fit.tmp <- coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.1 + ccr5.4 + ccr5.6 + I(age.inf/10), data=aidssi.mst) anova(fit.tmp,fit.aidssi.mst) ## 2. Proportional baseline hazards ################################### ## FIGURE 4.2: estimated cumulative baseline hazards par(las=1) plot(survfit(fit.aidssi.mst), fun="cumhaz", mark.time=FALSE, xlim=c(0,12), ylim=c(0,2), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",3),"grey50","black","grey50")) legend(8,1.8, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"), lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n" ) ## Fit the model and test for proportionality aidssi.mst$trans <- factor(aidssi.mst$trans) fit.aidssi.mst2 <- coxph(Surv(Tstart,Tstop,status) ~ relevel(trans,2) + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf/10), data=aidssi.mst) cox.zph(fit.aidssi.mst2) ## Use transition 1. HIV -> AIDS as reference transition cox.zph(coxph(Surv(Tstart,Tstop,status) ~ trans + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf/10), data=aidssi.mst)) ## Create variable tr.prop that defines the transitions that are assumed proportional aidssi.mst$tr.AIDS <- ifelse(aidssi.mst$trans==1,1,2) aidssi.mst$tr.prop <- factor(pmax(2,as.numeric(aidssi.mst$trans)),labels=c("2","3","4","5","6")) ## Fit the model fit.aidssi.mst3 <- coxph(Surv(Tstart,Tstop,status) ~ strata(tr.AIDS) + tr.prop + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf/10), data=aidssi.mst) fit.aidssi.mst3 ## 3. Dual role of intermediate states ###################################### coxph(Surv(Tstart,Tstop,status) ~ trans, data=aidssi.mst, subset=trans %in% c(1,5)) ## 4. Beyond the Markov model: effect of transition time ######################### aidssi.mst$aids.time <- ifelse(aidssi.mst$trans %in% c(4,6),aidssi.mst$Tstart,0) coxph(Surv(Tstart,Tstop,status) ~ strata(tr.AIDS) + tr.prop + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf/10) + aids.time, data=aidssi.mst) ## 5. Standard error #################### coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf/10)+cluster(patnr), data=aidssi.mst) ##################################################################################### ## Chapter 5, Section 5.7.1 ##################################################################################### ####################################################### ## From cause-specific/transition hazard to probability ####################################################### ## Competing risks ################## ## Basis: fitted proportional cause-specific hazards model fit.stacked <- coxph(Surv(time, status) ~ ccr.comb + strata(failcode), data = aidssi.stack) ## Create data frame with covariable value of interest and one row per event type indiv.WW <- data.frame(strata=c("AIDS","SI"), ccr.comb="WW") indiv.WM <- data.frame(strata=c("AIDS","SI"), ccr.comb=c("WM.AIDS","WM.SI")) indiv.WW indiv.WM ## or we can write ## indiv.WW <- subset(aidssi.stack, patnr==1) ## indiv.WW$strata <- factor(indiv.WW$failcode) ## indiv.WM <- subset(aidssi.stack, patnr==2) ## indiv.WM$strata <- factor(indiv.WM$failcode) ## Compute estimates of cause-specific cumulative incidence ## Steps two and three in mstate approach (see also Pages 139/140 of the book) trans.SI <- trans.comprisk(2, c("event-free","AIDS","SI")) Haz.WW <- msfit(fit.stacked, newdata=indiv.WW, trans=trans.SI) Haz.WM <- msfit(fit.stacked, newdata=indiv.WM, trans=trans.SI) AJprop.WW <- probtrans(Haz.WW, predt=0) AJprop.WM <- probtrans(Haz.WM, predt=0) ## FIGURE 5.1 par(las=1) plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2), xlim=c(0,13), ylim=c(0,1), xlab="time since HIV infection (years)", xaxs="i", fun="event") lines(pstate3~time,data=AJprop.WW[[1]], type="s", lwd=2) lines(pstate3~time,data=AJprop.WM[[1]], type="s", lwd=2, lty=2) legend(1,0.55, c("WW","WM"), lwd=c(2,2), lty=c(1,2), bty="n", y.intersp=1.4) lines(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="AIDS"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2), xlim=c(0,13), ylim=c(0,0.48), xlab="", xaxs="i") lines(1-pstate2~time,data=AJprop.WW[[1]], type="s", lwd=2) lines(1-pstate2~time,data=AJprop.WM[[1]], type="s", lwd=2, lty=2) legend(1,0.55, c("WW","WM"), lty=c(1,2), bty="n", y.intersp=1.4) text(11,0.7, "AIDS") text(11,0.1, "SI") ## Multi-state model #################### ## We first repeat the creation of the data set aidssi.mst <- msprep(aidssi2, trans=trans.mst, time=c(NA,"aids.time","si.time","siaids.time","death.time"), status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"), start=list(time=aidssi2$entry.time,state=rep(1,nrow(aidssi2))), keep=c("ccr5","age.inf","aids.time")) aidssi.mst <- expand.covs(aidssi.mst, covs=c("ccr5","age.inf"), longnames=FALSE) aidssi.mst$ccr5.23 <- aidssi.mst$ccr5.2+aidssi.mst$ccr5.3 aidssi.mst$ccr5.2 <- NULL aidssi.mst$age.inf.1 <- aidssi.mst$age.inf.2 <- aidssi.mst$age.inf.3 <- aidssi.mst$age.inf.4 <- aidssi.mst$age.inf.5 <- aidssi.mst$age.inf.6 <- NULL aidssi.mst$tr.AIDS <- ifelse(aidssi.mst$trans==1,1,2) aidssi.mst$tr.prop <- factor(pmax(2,as.numeric(aidssi.mst$trans)),labels=c("2","3","4","5","6")) ## The model on which we want to base our predictions fit.aidssi.mst3 <- coxph(Surv(Tstart,Tstop,status) ~ strata(tr.AIDS) + tr.prop + I(age.inf/10) + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6, data=aidssi.mst) ## Estimate for individual with covariable combination age=25 and WW genotype patWW25 <- data.frame(age.inf=rep(25,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)), ccr5.23=rep(0,6), ccr5.1=rep(0,6), ccr5.4=rep(0,6), ccr5.5=rep(0,6), ccr5.6=rep(0,6)) patWW25 trHaz.WW25 <- msfit(fit.aidssi.mst3, newdata=patWW25, trans=trans.mst) prob.WW25 <- probtrans(trHaz.WW25, predt=2) ## Estimate for individual with covariable combination age=25 and WM genotype patWM25 <- data.frame(age.inf=rep(25,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)), ccr5.23=c(0,1,1,0,0,0), ccr5.1=c(1,rep(0,5)), ccr5.4=c(0,0,0,1,0,0), ccr5.5=c(rep(0,4),1,0), ccr5.6=c(rep(0,5),1)) patWM25 trHaz.WM25 <- msfit(fit.aidssi.mst3, newdata=patWM25, trans=trans.mst) prob.WM25 <- probtrans(trHaz.WM25, predt=2) ## Estimate for individual with covariable combination age=50 and WW genotype patWW50 <- data.frame(age.inf=rep(50,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)), ccr5.23=rep(0,6), ccr5.1=rep(0,6), ccr5.4=rep(0,6), ccr5.5=rep(0,6), ccr5.6=rep(0,6)) trHaz.WW50 <- msfit(fit.aidssi.mst3, newdata=patWW50, trans=trans.mst) prob.WW50 <- probtrans(trHaz.WW50, predt=2) ## Estimate for individual with covariable combination age=50 and WM genotype patWM50 <- data.frame(age.inf=rep(50,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)), ccr5.23=c(0,1,1,0,0,0), ccr5.1=c(1,rep(0,5)), ccr5.4=c(0,0,0,1,0,0), ccr5.5=c(rep(0,4),1,0), ccr5.6=c(rep(0,5),1)) trHaz.WM50 <- msfit(fit.aidssi.mst3, newdata=patWM50, trans=trans.mst) prob.WM50 <- probtrans(trHaz.WM50, predt=2) ## A warning message appears in all estimates: ## In probtrans(trHaz.WW25, predt = 2) : ## Warning! Negative diagonal elements of (I+dA); the estimate may not be meaningful. ## This occurs if an increment in the Nelson-Aalen estimate is larger than 1. ## It typically occurs if the remaining number at risk is small and can usually be neglected ## FIGURE 5.3 plot(pstate5~time, data=prob.WW25[[1]], type="s", lwd=2, xlim=c(2,13), ylim=c(0,1), xlab="time since HIV infection (years)", ylab="death probability") lines(pstate5~time, data=prob.WW25[[3]], type="s") lines(pstate5~time, data=prob.WM25[[1]], type="s", lwd=2, lty=2) lines(pstate5~time, data=prob.WM25[[3]], type="s", lty=2) # Create bars for confidence intervals tmp <- match(1, prob.WW25[[1]]$time>6) with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2)) tmp <- match(1, prob.WW25[[1]]$time>10) with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2)) tmp <- match(1, prob.WW25[[3]]$time>4) with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp])) tmp <- match(1, prob.WW25[[3]]$time>8) with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp])) tmp <- match(1, prob.WM25[[1]]$time>6)+10 with(prob.WM25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2)) tmp <- match(1, prob.WM25[[1]]$time>10)+7 with(prob.WM25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2)) tmp <- match(1, prob.WM25[[3]]$time>4)+5 with(prob.WM25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2)) tmp <- match(1, prob.WM25[[3]]$time>8)+5 with(prob.WM25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2)) ## FIGURE 5.4 plot(pstate5~time, data=prob.WW25[[1]], type="s", lwd=2, xlim=c(2,13), ylim=c(0,1), xlab="time since HIV infection (years)", ylab="death probability") lines(pstate5~time, data=prob.WW25[[3]], type="s") lines(pstate5~time, data=prob.WW50[[1]], type="s", lwd=2, lty=2) lines(pstate5~time, data=prob.WW50[[3]], type="s", lty=2) tmp <- match(1, prob.WW25[[1]]$time>6) with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2)) tmp <- match(1, prob.WW25[[1]]$time>10) with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2)) tmp <- match(1, prob.WW25[[3]]$time>4) with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp])) tmp <- match(1, prob.WW25[[3]]$time>8) with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp])) tmp <- match(1, prob.WW50[[1]]$time>6)+10 with(prob.WW50[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2)) tmp <- match(1, prob.WW50[[1]]$time>10)+7 with(prob.WW50[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2)) tmp <- match(1, prob.WW50[[3]]$time>4)+5 with(prob.WW50[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2)) tmp <- match(1, prob.WW50[[3]]$time>8)+5 with(prob.WW50[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp], (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2)) ####################################### ## Regression on subdistribution hazard ####################################### ## Fit model based on one overall weight function fit.FG <- coxph(Surv(Tstart,Tstop,status==failcode) ~ ccr.comb + strata(failcode), data=aidssi.w, weights=weight.cens) fit.FG ## Using separate weight function per value of CCR5, differences are small aidssi.wCCR <- within(aidssi.wCCR,{ ccrAIDS <- ifelse(failcode=="AIDS"&ccr5=="WM", 1, ifelse(is.na(ccr5),NA,0) ) ccrSI <- ifelse(failcode=="SI"&ccr5=="WM", 1, ifelse(is.na(ccr5),NA,0) ) ccr.comb <- factor(ccrAIDS+2*ccrSI, labels=c("WW","WM.AIDS","WM.SI")) }) coxph(Surv(Tstart,Tstop,status==failcode)~ccr.comb+strata(failcode), data=aidssi.wCCR, weights=weight.cens) ## Create covariable combinations and compute predictions, based on fit.FG indivs <- data.frame(ccr.comb=factor(c("WW","WW","WM.AIDS","WM.SI")), failcode=c("AIDS","SI","AIDS","SI")) pred.sdh <- survfit(fit.FG, newdata=indivs) ## FIGURE 5.5 plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2), xlim=c(0,13), ylim=c(0,1), xlab="time since HIV infection (years)", xaxs="i", fun="event") lines(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="AIDS"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2)) legend(1,0.55,c("WW","WM"),lwd=c(2,2),lty=c(1,2),bty="n",y.intersp=1.4) lines(pred.sdh[c(2,4)], mark.time=FALSE, fun="event", lty=1:2, lwd=2) lines(pred.sdh[c(1,3)], mark.time=FALSE, lty=1:2, lwd=2) text(11,0.7,"AIDS") text(11,0.1,"SI") ## Test for non-proportionality test.PH <- cox.zph(fit.FG, transform="identity") test.PH par(mfrow=c(1,2)) plot(test.PH) par(mfrow=c(1,1)) ## Fit proportional subdistribution hazards via cmprsk package, uses sandwich estimator of standard ## error library(cmprsk) with(aidssi, crr(time, status, as.numeric(ccr5)-1, failcode=1)) with(aidssi, crr(time, status, as.numeric(ccr5)-1, failcode=2)) ## Effect estimate the same, standard error slightly different ## Test for proportionality, using sandwich-based estimator # install.packages("crrSC") library(crrSC) with(aidssi, psh.test(time,status,as.numeric(ccr5)-1,D=1, tf=function(x) x)) ## Since ccrSC requires the event of interest to have value 1, we create a temporary data set with ## SI event 1 tmp <- aidssi tmp$status <- ifelse(tmp$status==2,1,tmp$status*2) with(tmp, psh.test(time,status,as.numeric(ccr5)-1,D=1, tf=function(x) x)) ############################## ## Proportional odds model ############################## # install.packages("timereg") library(timereg) ## SI as event of interest, one overall weight function fit.Podds <- comp.risk(Event(time,status)~const(ccr5), data=subset(aidssi,!is.na(ccr5)), cause=2, times=subset(aidssi,!is.na(ccr5)&status==2)$time, model="logistic" ) coef(fit.Podds) ## SI as event of interest, CCR5-specific weight function based on Cox model fit.Podds.w <- comp.risk(Event(time,status)~const(ccr5), data=subset(aidssi,!is.na(ccr5)), cause=2, times=subset(aidssi,!is.na(ccr5)&status==2)$time, model="logistic", cens.model="cox" ) coef(fit.Podds.w) ## AIDS as event of interest fit.Podds.AIDS <- comp.risk(Event(time,status)~const(ccr5), data=subset(aidssi,!is.na(ccr5)), cause=1, times=subset(aidssi,!is.na(ccr5)&status==1)$time, model="logistic" ) newdata <- data.frame(ccr5=c(0,1)) pred.SI <- predict(fit.Podds, newdata=newdata) pred.AIDS <- predict(fit.Podds.AIDS, newdata=newdata) ## FIGURE 5.7 plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2), xlim=c(0,13), ylim=c(0,1), xlab="time since HIV infection (years)", xaxs="i", fun="event") lines(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="AIDS"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2)) legend(1,0.55,c("WW","WM"),lwd=c(2,2),lty=c(1,2),bty="n",y.intersp=1.4) lines(pred.SI$time,pred.SI$P1[1,],type="s",lwd=2) lines(pred.SI$time,pred.SI$P1[2,],type="s",lty=2,lwd=2) lines(pred.AIDS$time,1-pred.AIDS$P1[1,],type="s",lwd=2) lines(pred.AIDS$time,1-pred.AIDS$P1[2,],type="s",lty=2,lwd=2) text(11,0.7,"AIDS") text(11,0.1,"SI")