##########################################################################
## Chapter 1, section 1.11
##########################################################################

#######################
### The aidssi data set
#######################
library(mstate) # also loads the survival package
data(aidssi)

aidssi[c(14,3,15,8),] # four example individuals

######################################
### Define time and status information
######################################
Surv(time=time, event=(status!=0))
Surv(time=time, event=(cause!="event-free"))

########################
### Perform calculations
########################
KM.curve <- survfit(Surv(time, status!=0)~1, data=aidssi) # Kaplan-Meier
fit.Cox <- coxph(Surv(time, status!=0)~ccr5, data=aidssi) # Cox model

######################
### Summary of outcome
######################
KM.curve                          # concise summary of Kaplan-Meier estimate
summary(KM.curve)                 # more detailed summary
summary(KM.curve, times=c(5,10))  # detailed summary at specific time points
## confidence intervals on log-log scale
summary(survfit(Surv(time, status!=0)~1, data=aidssi, conf.type="log-log"), times=c(5,10))

## plot Kaplan-Meier
par(las=1)
## as survival function
plot(KM.curve, mark.time=FALSE, conf.int=FALSE,
     xlab="time since HIV infection (years)", ylab="overall cumulative incidence")
## FIGURE 1.22, as cumulative incidence
plot(KM.curve, fun="event", mark.time=FALSE, conf.int=FALSE,
     xlab="time since HIV infection (years)", ylab="overall cumulative incidence")
## estimate of cumulative hazard
plot(KM.curve, fun="cumhaz", mark.time=FALSE, conf.int=FALSE,
     xlab="time since HIV infection (years)", ylab="overall cumulative incidence")

fit.Cox          # concise summary of Cox model
summary(fit.Cox) # more detailed summary
cox.zph(fit.Cox) # test for proportionality based on scaled Schoenfeld residuals

## prediction based on Cox model
summary(survfit(fit.Cox), times=c(5,10))                       # "average individual"
new.indiv <- data.frame(ccr5=c("WW","WM"))                     # create specific values
summary(survfit(fit.Cox, newdata=new.indiv)[1], times=c(5,10)) # first individual in new.indiv (WW)
summary(survfit(fit.Cox, newdata=new.indiv)[2], times=c(5,10)) # second individual (WM)
plot(survfit(fit.Cox, newdata=new.indiv))                      # predicted survival curves

#################
### Log-rank test
#################
survdiff(Surv(time,status!=0)~ccr5, data=aidssi)
KM.ccr5 <- survfit(Surv(time,status!=0)~ccr5, data=aidssi) # Kaplan-Meier per CCR5 value
summary(KM.ccr5[1])                                        # summary for reference value (WW)
plot(KM.ccr5)                                              # plot for both values
plot(KM.ccr5[1])                                           # plot for WW group

rm(KM.curve, fit.Cox, KM.ccr5)

###########################################################################
## chapter 2, section 2.9
###########################################################################

##########################
## The Aalen-Johansen form
##########################

### The survival package
########################

## compute estimates
csiSurv <- survfit(Surv(time=time,event=status,type="mstate")~1, data=aidssi)
## alternative event specification, via factor variable "cause"
survfit(Surv(time=time,event=relevel(cause,"event-free"))~1, data=aidssi)
## estimates at specific time points
summary(csiSurv, times=seq(2,10,by=2))

## confidence intervals
csiSurv$lower
csiSurv$upper

## FIGURE 2.4
par(las=1)
plot(survfit(Surv(time,status==2)~1,data=aidssi), xlim=c(0,13), mark.time=FALSE, col="grey50",
     conf.int=FALSE, xlab="time since HIV infection",xaxs="i")
lines(survfit(Surv(time,status==1)~1,data=aidssi), fun="event", mark.time=FALSE, col="grey50",
      conf.int=FALSE)
lines(csiSurv[2], fun="identity", mark.time=FALSE, lwd=2, conf.int=FALSE)
lines(csiSurv[1], mark.time=FALSE, lwd=2, conf.int=FALSE)
text(8.9, 0.18, "time to AIDS")
text(8.9, 0.75, "time to SI")

## FIGURE 2.5
plot(survfit(Surv(time,status>0)~1, data=aidssi), xlim=c(0,13), mark.time=FALSE, conf.int=FALSE,
     xlab="time since HIV infection", fun="event", lwd=2, xaxs="i")
lines(csiSurv[1], fun="event", mark.time=FALSE, lwd=2, conf.int=FALSE)
text(8.9, 0.15, "time to AIDS")
text(8.9, 0.45, "time to SI")

## create data frame with confidence intervals on the two log scales and complementary log-log scales, as well
## as on the plain scale
csiSurvCI <- data.frame(est=csiSurv$prev[,2])
csiSurvCI$time <- csiSurv$time
csiSurvCI$std.err <- csiSurv$std.err[,2]
csiSurvCI$lower.log <- pmax(0,1-(1-csiSurvCI$est)*
                 exp(qnorm(0.975)*csiSurvCI$std.err/(1-csiSurvCI$est)))
csiSurvCI$upper.log <- 1-(1-csiSurvCI$est)*
                exp(-qnorm(0.975)*csiSurvCI$std.err/(1-csiSurvCI$est))
csiSurvCI$lower.log2 <- csiSurvCI$est*
                 exp(-qnorm(0.975)*csiSurvCI$std.err/csiSurvCI$est)
csiSurvCI$upper.log2 <- pmin(1,csiSurvCI$est*
                exp(qnorm(0.975)*csiSurvCI$std.err/csiSurvCI$est))
csiSurvCI$lower.loglog <- 1-(1-csiSurvCI$est)^(
                exp(qnorm(0.975)*csiSurvCI$std.err/((1-csiSurvCI$est)*log(1-csiSurvCI$est))))
csiSurvCI$upper.loglog <- 1-(1-csiSurvCI$est)^(
                 exp(-qnorm(0.975)*csiSurvCI$std.err/((1-csiSurvCI$est)*log(1-csiSurvCI$est))))
csiSurvCI$lower.loglog2 <- csiSurvCI$est^(
                exp(-qnorm(0.975)*csiSurvCI$std.err/(csiSurvCI$est*log(csiSurvCI$est))))
csiSurvCI$upper.loglog2 <- csiSurvCI$est^(
                 exp(qnorm(0.975)*csiSurvCI$std.err/(csiSurvCI$est*log(csiSurvCI$est))))
csiSurvCI$lower.lin <- pmax(0,csiSurvCI$est-qnorm(0.975)*csiSurvCI$std.err)
csiSurvCI$upper.lin <- pmin(1,csiSurvCI$est+qnorm(0.975)*csiSurvCI$std.err)

## FIGURE 2.8
par(las=1)
plot(c(0,est)~c(0,time), data=csiSurvCI, lwd=3, xlim=c(0,3), xaxs="i", ylab="",
                xlab="time since HIV infection (years)", ylim=c(0,0.10), type="s")
lines(upper.log~time, data=csiSurvCI, type="s", lty="dotted", lwd=3)
lines(lower.log~time, data=csiSurvCI, type="s", lty="dotted", lwd=3)
lines(upper.log2~time, data=csiSurvCI, type="s", lty="dotted", lwd=3, col="grey70")
lines(lower.log2~time, data=csiSurvCI, type="s", lty="dotted", lwd=3, col="grey70")
lines(upper.loglog~time, data=csiSurvCI, type="s")
lines(lower.loglog~time, data=csiSurvCI, type="s")
lines(upper.loglog2~time, data=csiSurvCI, type="s", col="grey70")
lines(lower.loglog2~time, data=csiSurvCI, type="s", col="grey70")
lines(upper.lin~time, data=csiSurvCI, type="s", lty="dashed")
lines(lower.lin~time, data=csiSurvCI, type="s", lty="dashed")


### the etm package
###################
library(etm)

## compute estimates
csiEtm <- etmCIF(Surv(time=time,event=status!=0)~1, data=aidssi, etype=status, failcode=c(1,2))
## brief summary, not very useful
csiEtm
## summary of estimates at six time points (error if options(stringsAsFactors=FALSE)
summary(csiEtm)
## summary at all time points if assigned to object
summEtm <- summary(csiEtm)
## detailed information on first event type (AIDS)
summEtm[[1]][["CIF 1"]]
## alternative event specification, via factor variable "cause"
summEtm <- summary(etmCIF(Surv(time=time,event=status!=0)~1, data=aidssi, etype=cause,
                          failcode=c("AIDS","SI")))
summEtm[[1]][["CIF AIDS"]]
## confidence intervals on another scale, e.g.
summary(csiEtm, ci.fun="log")

## another way to obtain the estimate
trprob(csiEtm[[1]], tr.choice="0 2")
## estimate and variance at specific time points
trprob(csiEtm[[1]], tr.choice="0 2", timepoints=seq(2,10,by=2))
trcov(csiEtm[[1]], tr.choice="0 2", timepoints=seq(2,10,by=2))

## FIGURE 2.4
par(las=1)
plot(survfit(Surv(time,status==2)~1, data=aidssi), xlim=c(0,13), mark.time=FALSE, col="grey50",
     conf.int=FALSE, xlab="time since HIV infection", xaxs="i")
lines(survfit(Surv(time,status==1)~1, data=aidssi), fun="event", mark.time=FALSE, col="grey50",
      conf.int=FALSE)
with(csiEtm[[1]], lines(time,1-est[1,3,], lwd=2)) # plot.etm cannot use the survival scale
lines(csiEtm[[1]], tr.choice="0 1", lwd=2)
text(8.9, 0.18, "time to AIDS")
text(8.9, 0.75, "time to SI")

## FIGURE 2.5
plot(survfit(Surv(time,status>0)~1, data=aidssi), xlim=c(0,13), mark.time=FALSE, conf.int=FALSE,
     xlab="time since HIV infection", fun="event", lwd=2, xaxs="i")
lines(csiEtm[[1]], tr.choice="0 1", lwd=2)
text(8.9, 0.15, "time to AIDS")
text(8.9, 0.45, "time to SI")


## create data frame with estimate and confidence interval on different scales
csiEtmCI <- data.frame(est=trprob(csiEtm[[1]], tr.choice="0 2"))
csiEtmCI$time <- as.numeric(row.names(csiEtmCI))
csiEtmCI$std.err <- sqrt(trcov(csiEtm[[1]], tr.choice="0 2"))
csiEtmCI$lower.log <- pmax(0,1-(1-csiEtmCI$est)*
                 exp(qnorm(0.975)*csiEtmCI$std.err/(1-csiEtmCI$est)))
csiEtmCI$upper.log <- 1-(1-csiEtmCI$est)*
                exp(-qnorm(0.975)*csiEtmCI$std.err/(1-csiEtmCI$est))
csiEtmCI$lower.log2 <- csiEtmCI$est*
                 exp(-qnorm(0.975)*csiEtmCI$std.err/csiEtmCI$est)
csiEtmCI$upper.log2 <- pmin(1,csiEtmCI$est*
                exp(qnorm(0.975)*csiEtmCI$std.err/csiEtmCI$est))
csiEtmCI$lower.loglog <- 1-(1-csiEtmCI$est)^(
                exp(qnorm(0.975)*csiEtmCI$std.err/((1-csiEtmCI$est)*log(1-csiEtmCI$est))))
csiEtmCI$upper.loglog <- 1-(1-csiEtmCI$est)^(
                 exp(-qnorm(0.975)*csiEtmCI$std.err/((1-csiEtmCI$est)*log(1-csiEtmCI$est))))
csiEtmCI$lower.loglog2 <- csiEtmCI$est^(
                exp(-qnorm(0.975)*csiEtmCI$std.err/(csiEtmCI$est*log(csiEtmCI$est))))
csiEtmCI$upper.loglog2 <- csiEtmCI$est^(
                 exp(qnorm(0.975)*csiEtmCI$std.err/(csiEtmCI$est*log(csiEtmCI$est))))
csiEtmCI$lower.lin <- pmax(0,csiEtmCI$est-qnorm(0.975)*csiEtmCI$std.err)
csiEtmCI$upper.lin <- pmin(1,csiEtmCI$est+qnorm(0.975)*csiEtmCI$std.err)


## FIGURE 2.8
par(las=1)
plot(c(0,est)~c(0,time), data=csiEtmCI, lwd=3, xlim=c(0,3), xaxs="i", ylab="",
                xlab="time since HIV infection (years)", ylim=c(0,0.10), type="s")
lines(upper.log~time, data=csiEtmCI, type="s", lty="dotted", lwd=3)
lines(lower.log~time, data=csiEtmCI, type="s", lty="dotted", lwd=3)
lines(upper.log2~time, data=csiEtmCI, type="s", lty="dotted", lwd=3, col="grey70")
lines(lower.log2~time, data=csiEtmCI, type="s", lty="dotted", lwd=3, col="grey70")
lines(upper.loglog~time, data=csiEtmCI, type="s")
lines(lower.loglog~time, data=csiEtmCI, type="s")
lines(upper.loglog2~time, data=csiEtmCI, type="s", col="grey70")
lines(lower.loglog2~time, data=csiEtmCI, type="s", col="grey70")
lines(upper.lin~time, data=csiEtmCI, type="s", lty="dashed")
lines(lower.lin~time, data=csiEtmCI, type="s", lty="dashed")

## In version >= 0.6-2 there is a function ggtransfo that transforms estimates to format for use in ggplot2
csiEtm.gg <- ggtransfo(csiEtm[[1]])
head(csiEtm.gg)
library(ggplot2)
ggplot(csiEtm.gg, aes(time,P,group=trans)) + geom_step(aes(colour=trans)) +
    geom_rect(aes(xmin=time,xmax=timemax,ymin=lower,ymax=upper,fill=trans), alpha=0.5)

##################################
## The weighted product limit form
##################################
library(mstate) # if not attached already

## create data set with weights
aidssi.w <- crprep("time", "cause", data=aidssi, trans=c("AIDS","SI"), cens="event-free",
                   id="patnr", keep="ccr5")
## alternative specification by values instead of column names
aidssi.w <- with(aidssi, crprep(time, cause, trans=c("AIDS","SI"), cens="event-free",
                                id=patnr, keep=ccr5))

## calculate PL-form for SI as event
survfit(Surv(Tstart,Tstop,status=="SI")~1, data=aidssi.w, weights=weight.cens,
        subset=failcode=="SI")
## both end points at the same time
csiPL <- survfit(Surv(Tstart,Tstop,status==failcode)~failcode, data=aidssi.w, weights=weight.cens)
## summary
summary(csiPL["failcode=SI"], times=seq(2,10,by=2))
## select by number of stratum
csiPL$strata
summary(csiPL[2], times=seq(2,10,by=2))

## FIGURE 2.9, comparison of CI's: Gaynor/Betensky and PL-based
plot(csiSurv[2], mark.time=FALSE, lwd=3, xlim=c(0,13), xaxs="i",
     xlab="time since HIV infection (years)", ylim=c(0,0.44), conf.int=FALSE)
lines(csiSurv[2], fun="event", mark.time=FALSE, conf.int="only", lty="solid", lwd=4, col="grey80")
lines(csiPL[2], fun="event", mark.time=FALSE, conf.int="only", lty="solid", lwd=1)
## The log scale as used in survfit has not been implemented in the plot or lines function in etm
## We can use the CI as calculated and stored in csiEtmCI, gives same result as
lines(upper.log~time, data=csiEtmCI, type="s", lwd=1, col="red")
lines(lower.log~time, data=csiEtmCI, type="s", lwd=1, col="red")

###########################################
## Using the finegray function from survfit
###########################################

aidssi.w2 <- finegray(Surv(time,status,type="mstate")~., data=aidssi)
aidssi.w2 <- finegray(Surv(time,relevel(cause,"event-free"))~., aidssi, etype="AIDS")

aidssi.w2 <- finegray(Surv(time,status,type="mstate")~., data=aidssi, etype=2)
aidssi.w2 <- finegray(Surv(time,relevel(cause,"event-free"))~., aidssi, etype="SI")

#################
## Log-rank tests
#################

## log-rank test on cause-specific hazards
survdiff(Surv(time,status==1)~ccr5, data=aidssi) # AIDS
survdiff(Surv(time,status==2)~ccr5, data=aidssi) # SI
## same result based on score test
lr.test <- coxph(Surv(time,status==1)~ccr5, data=aidssi)$score
c(lr.test,1-pchisq(lr.test,1))
lr.test <- coxph(Surv(time,status==2)~ccr5, data=aidssi)$score
c(lr.test,1-pchisq(lr.test,1))

## log rank test on subdistribution hazard
## cmprsk package
install.packages("cmprsk") # if not yet installed
library(cmprsk)
with(aidssi, cuminc(time, status, group=ccr5)$Tests)

## via estimate of subdistribution hazard, with CCR5 specific weights
aidssi.wCCR <- crprep("time", "cause", data=aidssi, trans=c("AIDS","SI"), cens="event-free",
                      strata="ccr5")
## test based on score test from proportional subdistribution hazards model
test.AIDS <- coxph(Surv(Tstart,Tstop,status=="AIDS")~ccr5, data=aidssi.wCCR, weights=weight.cens,
                   subset=failcode=="AIDS")
test.SI <- coxph(Surv(Tstart,Tstop,status=="SI")~ccr5, data=aidssi.wCCR, weights=weight.cens,
                 subset=failcode=="SI")
## score test statistic and p-value
c(test.AIDS$score, 1-pchisq(test.AIDS$score,1)) # AIDS
c(test.SI$score, 1-pchisq(test.SI$score,1))     # SI

## FIGURE 2.11: plot of cause-specific and subdistribution cumulative hazard to SI by CCR5
par(mfrow=c(1,2))
plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5),
             subset(aidssi.wCCR,failcode=="SI"&count==1)),
     mark.time=FALSE, lwd=c(2,2), lty=c(1,2), fun="cumhaz", xlim=c(0,13), ylim=c(0,0.7),
     xlab="time since HIV infection (years)")
plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"),
             weights=weight.cens), mark.time=FALSE, lwd=c(2,2), lty=c(1,2), fun="cumhaz",
     xlim=c(0,13), ylim=c(0,0.7), xlab="time since HIV infection (years)")
par(mfrow=c(1,1))

rm(csiSurvCI,csiEtmCI,csiEtm.gg,test.AIDS,test.SI,lr.test)

###########################################################################
## Chapter 3, Section 3.7
###########################################################################

## We first need to create two extra columns for the state "3 AIDS/SI" in FIGURE 3.3
data(aidssi2)
aidssi2 <- aidssi2[,c(1,2,3,4,5,6,5,6,7:10)] # duplicate info on SI state
names(aidssi2)[c(7,8)] <- c("siaids.time","siaids.stat") # rename
tmp.si.aids <- aidssi2$si.time < aidssi2$aids.time # SI switch before AIDS?
## In "1 AIDS" and "2 SI", we need to set the status column to zero if the other happened first
aidssi2 <- within(aidssi2, {
    siaids.stat <- aids.stat*si.stat # 1 if both occurred
    siaids.time <- pmax(aids.time,si.time)
    aids.stat[tmp.si.aids] <- 0  # change to 0 if SI occurs first
    si.stat[!tmp.si.aids] <- 0 # change to 0 if AIDS occurs first
})
## If individuals had an SI switch before AIDS diagnosis, they never reached state "1 AIDS". Therefore, the
## column aids.time needs to be set at the end of the follow-up. The same holds for the six individuals that
## developed AIDS before having a switch to SI
aidssi2$aids.time.orig <- aidssi2$aids.time
aidssi2$si.time.orig <- aidssi2$si.time
aidssi2$aids.time[aidssi2$aids.stat==0] <- aidssi2$death.time[aidssi2$aids.stat==0]
aidssi2$si.time[aidssi2$si.stat==0] <- aidssi2$death.time[aidssi2$si.stat==0]
rm(tmp.si.aids)



##################
## The etm package
##################

library(etm)

## Specify allowed transitions
trans.etm <- matrix(FALSE,nrow=5,ncol=5)
trans.etm[rbind( c(1,2), c(1,3), c(2,4), c(2,5), c(3,4) ,c(4,5) )] <- TRUE
trans.etm
dimnames(trans.etm) <- list(c("HIV","AIDS", "SI", "AIDS/SI", "death"),
                       c("HIV","AIDS", "SI", "AIDS/SI", "death")) # not required to create dimnames

## Create data set
aidssi.etm <- etmprep(time=c(NA,"aids.time","si.time","siaids.time","death.time"),
                   status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"),
                   data=aidssi2, tra=trans.etm, cens.name="cens",
                   state.names=c("HIV", "AIDS","SI","AIDS/SI", "death"),
                   start=list(time=aidssi2$entry.time,state=rep("HIV",nrow(aidssi2))))

## The computations

## First cumulative transition hazards
library(mvna) #  best estimated via mvna package
NA.etm <- mvna(aidssi.etm, state.names=c("HIV", "AIDS","SI", "AIDS/SI", "death"), tra=trans.etm, cens.name="cens")
summary(NA.etm)
## alternative, only for each transition separately
library(survival)
CumHaz.2to3 <- survfit(coxph(Surv(entry,exit,to=="AIDS/SI")~1, data=subset(aidssi.etm,from=="SI")))

## Transition probabilities
Prob.etm <- etm(aidssi.etm, state.names=c("HIV", "AIDS","SI", "AIDS/SI", "death") , tra=trans.etm,
                cens.name="cens", s=0)

## Transition probabilities starting at 4 years
Prob.etm.s4 <- etm(aidssi.etm, c("HIV", "AIDS","SI", "AIDS/SI", "death") , trans.etm, "cens", s=4)

## Summarizing results via summary.etm
summary(Prob.etm, ci.fun="log") # summary at six time points
Summ.Prob.etm <- summary(Prob.etm, ci.fun="log") # summary at all time points
Summ.Prob.etm[["SI AIDS/SI"]][seq(100,400,by=100),]
Summ.Prob.etm[["HIV HIV"]][seq(100,400,by=100),]

## Summarizing results via trprob.etm and trcov.etm
## transition probability for a non-direct transition, e.g. from HIV to death
Prob.hiv.death <- trprob(Prob.etm, tr.choice=c("HIV death"))
Prob.hiv.death[seq(100,400,by=100)] # just four values
# Create data frame with the estimates
Prob.hiv.death <- data.frame(time=as.numeric(names(Prob.hiv.death)), prob=Prob.hiv.death)
row.names(Prob.hiv.death) <- 1:nrow(Prob.hiv.death)
tmp <- trcov(Prob.etm, tr.choice=c("HIV death"))
Prob.hiv.death$lower.ci <- pmax(0,1-(1-Prob.hiv.death$prob)*
                exp(qnorm(0.975)*sqrt(tmp)/(1-Prob.hiv.death$prob)))
Prob.hiv.death$upper.ci <- 1-(1-Prob.hiv.death$prob)*
                 exp(-qnorm(0.975)*sqrt(tmp)/(1-Prob.hiv.death$prob))

## Probability to live with AIDS, combined state
Prob.hiv.aids <- trprob(Prob.etm, "HIV AIDS") + trprob(Prob.etm, "HIV AIDS/SI")
Prob.hiv.aids <- data.frame(time=as.numeric(names(Prob.hiv.aids)),prob=Prob.hiv.aids)
row.names(Prob.hiv.aids) <- 1:nrow(Prob.hiv.aids)
## Confidence intervals, using formula (1.20)
Var.hiv.aids <- trcov(Prob.etm, "HIV AIDS") + trcov(Prob.etm, "HIV AIDS/SI") + 2*trcov(Prob.etm, c("HIV AIDS","HIV AIDS/SI"))
Prob.hiv.aids$lower.ci <- pmax(0,1-(1-Prob.hiv.aids$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob)))
Prob.hiv.aids$upper.ci <- 1-(1-Prob.hiv.aids$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob))


## Plotting the results

## Directly via plot command from etm package. Aalen-Johansen estimate in FIGURE 3.9 is obtained via
plot(Prob.etm, tr.choice=c("HIV death"), lwd=2, conf.int=TRUE, ci.fun="log", legend=FALSE, ylim=c(0,0.75))
# we add the confidence intervals on the other log scale:
lines(lower.ci~time, data=Prob.hiv.death, col="red")
lines(upper.ci~time, data=Prob.hiv.death, col="red")

## FIGURE 3.4: Nelson-Aalen estimates
plot(NA.etm, tr.choice=c("HIV SI","HIV AIDS"), lwd=c(3,3), lty=c(2,1), col=c(rep("black",2)), ylim=c(0,7),
     xlim=c(0,13), xlab="time since HIV infection (years)", ylab="cumulative transition hazard", legend=FALSE)
lines(NA.etm, tr.choice=c("SI AIDS/SI","AIDS AIDS/SI"), lwd=c(2,2), lty=c(2,1), col=c(rep("black",2)))
lines(NA.etm, tr.choice=c("AIDS/SI death"), col=c("grey50"))
lines(NA.etm, tr.choice=c("AIDS death"), lwd=2, col=c("grey50"))
legend(1,6, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
       lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n")
# the same estimates are obtained using the survival package, for example
lines(CumHaz.2to3, fun="cumhaz", col="red", mark.time=FALSE, conf.int=FALSE)

## FIGURE 3.5: transition probabilities for all direct transitions
plot(Prob.etm , c("HIV SI","HIV AIDS"), lwd=c(3,3), lty=c(2,1), col=c(rep("black",2)), legend=FALSE,
     xlim=c(0,13), xlab="time since HIV infection (years)", ylab="transition probability")
lines(Prob.etm, tr.choice=c("SI AIDS/SI","AIDS AIDS/SI"), lwd=c(2,2), lty=c(2,1), col=c(rep("black",2)))
lines(Prob.etm, tr.choice=c("AIDS/SI death"), col=c("grey50"))
lines(Prob.etm, tr.choice=c("AIDS death"), lwd=2, col=c("grey50"))
legend(7,0.65,c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
       lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n")
## FIGURE 3.6 is similar, but uses Prob.etm.s4

## FIGURE 3.7
plot(Prob.etm, c("HIV AIDS"), lty=2, lwd=3, legend=FALSE, xlim=c(0,13),
     xlab="time since HIV infection (years)", ylab="transition probability", ylim=c(0,0.6))
lines(Prob.etm, tr.choice=c("HIV SI"), lwd=3, lty=3)
lines(Prob.etm, tr.choice=c("HIV AIDS/SI"), lwd=3, col="grey50")
lines(Prob.etm,tr.choice=c("HIV death"), col="black")
lines(Prob.etm,tr.choice=c("HIV HIV"),lwd=3, col="black")
legend(0.5,0.5,c("0 HIV","1 AIDS","2 SI","3 AIDS/SI","4 death"), lwd=c(3,3,3,3,1), lty=c(1,2,3,1,1),
       col=c(rep("black",3),"grey50","black"), bty="n")

## FIGURE 3.8 (note: CI in FIGURE 3.8 slightly different, were based on the log scale in formula (1.23))
plot(prob~time, data=Prob.hiv.aids, type="s", xlab="time since HIV infection (years)",
     ylab="probability to be alive with AIDS", xlim=c(0,13), ylim=c(0,0.22), xaxs="i", lwd=3)
lines(lower.ci~time, data=Prob.hiv.aids, type="s", lty=3)
lines(upper.ci~time, data=Prob.hiv.aids, type="s", lty=3)
lines(Prob.etm, tr.choice=c("HIV AIDS"), lwd=2)

## FIGURE 3.9: compare Kaplan-Meier and Aalen-Johansen estimator
## We create some temporary variables that allow us to make a gray confidence band
tmp <- survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2)
x1 <- rep(tmp$time, each=2)[-1]
x2 <- rev(rep(tmp$time, each=2)[-1])
y1 <- 1-rep(tmp$lower,each=2)
y1 <- y1[-length(y1)]
y2 <- 1-rep(tmp$upper,each=2)
y2 <- rev(y2[-length(y2)])
## Creating the plot
par(las=1,mar=c(5.1,7.1,1,1))
plot(1,1, xlim=c(0, 13), xlab="time since HIV infection (years)", ylab="transition probability to death",
     ylim=c(0,0.7), type="n", xaxs="i")
polygon(c(x1,x2), c(y1,y2), density=-1, col=rgb(0.8,0.8,0.8, alpha=0.7), border=NA)
lines(survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2), fun="event", lwd=2, mark.time=FALSE,
      col="grey50", conf=FALSE)
lines(prob~time, data=Prob.hiv.death, lwd=2, type="s")
lines(lower.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s")
lines(upper.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s")
rm(x1,x2,y1,y2,tmp)

## FIGURE 3.11: State-AIDS entry distribution
# First redefine the model
trans.etm.AIDS <- matrix(FALSE,nrow=4,ncol=4)
trans.etm.AIDS[rbind( c(1,2), c(1,3),  c(3,4) )] <- TRUE
dimnames(trans.etm.AIDS) <- list(c("HIV","AIDS", "SI", "AIDS/SI"),
                       c("HIV","AIDS", "SI", "AIDS/SI"))
# Computation of transition probabilities (similar as above)
aidssi.etm.AIDS <- subset(aidssi.etm, !((from=="AIDS")|(from=="AIDS/SI")|(to=="death")))
Prob.etm.AIDS <- etm(aidssi.etm.AIDS, state.names=c("HIV", "AIDS","SI", "AIDS/SI") , tra=trans.etm.AIDS, cens.name="cens", s=0)
Prob.hiv.aids2 <- trprob(Prob.etm.AIDS, "HIV AIDS") + trprob(Prob.etm.AIDS, "HIV AIDS/SI")
Prob.hiv.aids2 <- data.frame(time=as.numeric(names(Prob.hiv.aids2)),prob=Prob.hiv.aids2)
row.names(Prob.hiv.aids2) <- 1:nrow(Prob.hiv.aids2)
Var.hiv.aids <- trcov(Prob.etm.AIDS, "HIV AIDS") + trcov(Prob.etm.AIDS, "HIV AIDS/SI") + 2*trcov(Prob.etm.AIDS, c("HIV AIDS","HIV AIDS/SI"))
Prob.hiv.aids2$lower.ci <- pmax(0,1-(1-Prob.hiv.aids2$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob)))
Prob.hiv.aids2$upper.ci <-  1-(1-Prob.hiv.aids2$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob))
## Plotting
plot(prob~time, data=Prob.hiv.aids2, type="s", xlab="time since HIV infection (years)",
     ylab="probability to develop AIDS", xlim=c(0,13), ylim=c(0,0.8), xaxs="i", lwd=3)
lines(prob~time, data=Prob.hiv.aids,type="s", col="grey")
# add Kaplan-Meier
lines(survfit(Surv(aids.time.orig, pmax(aids.stat,siaids.stat))~1, data=aidssi2),
      fun="event", mark.time=FALSE, col="red")


#####################
## The msSurv package
#####################

## code to download and install the graph package
source("http://bioconductor.org/biocLite.R")
biocLite("graph")

library(msSurv)

## Specify allowed transitions
Nodes <- c("HIV","AIDS","SI","AIDS/SI","death")
Edges <- list("HIV" = list(edges=c("AIDS","SI")),
              "AIDS" = list(edges=c("AIDS/SI","death")),
              "SI" = list(edges=c("AIDS/SI")),
              "AIDS/SI" = list(edges=c("death")),
              "death" = list(edges=NULL))
ms.Diag <- new("graphNEL", nodes=Nodes, edgeL=Edges, edgemode="directed")

## Create data set, use aidssi.etm as basis
aidssi.msSurv <- aidssi.etm
names(aidssi.msSurv)[1:5] <- c("id","start","stop","start.stage","end.stage") # fixed names
# Make state columns into character format. Censored data are specified via value "0".
aidssi.msSurv$start.stage <- as.character(aidssi.msSurv$start.stage)
aidssi.msSurv$end.stage <- as.character(aidssi.msSurv$end.stage)
aidssi.msSurv[aidssi.msSurv$end.stage=="cens","end.stage"] <- "0"


## The computations

## Cumulative hazards and transition probabilities
Prob.msSurv <- msSurv(aidssi.msSurv, ms.Diag, LT=TRUE)

## Summarizing results
## Same time points as seq(100,400,by=100) in summary.etm above, for comparison
## Estimates are the same, CI's are different because the scale is different
summary(Prob.msSurv, stateocc=FALSE, dist=FALSE, time=c(1.9903052703628,3.64691649555084,5.4784,6.82859274469547), ci.fun="log")

## Transition probability from HIV to death
trProb.est <- AJs(Prob.msSurv)
trProb.var <- cov.AJs(Prob.msSurv)
Prob.hiv.death <- data.frame(time=et(Prob.msSurv),
               prob=trProb.est[1,5,], var=trProb.var["HIV death","HIV death",])
row.names(Prob.hiv.death) <- 1:nrow(Prob.hiv.death)
Prob.hiv.death$lower.ci <- pmax(0,1-(1-Prob.hiv.death$prob)*
      exp(qnorm(0.975)*sqrt(Prob.hiv.death$var)/(1-Prob.hiv.death$prob)))
Prob.hiv.death$upper.ci <- 1-(1-Prob.hiv.death$prob)*
      exp(-qnorm(0.975)*sqrt(Prob.hiv.death$var)/(1-Prob.hiv.death$prob))

## Transition probabilities starting at s=4
## idx gives the indices of the event times after 4 years
idx <- which(4 <= et(Prob.msSurv) )
trProb.est4 <- array(0,dim=c(5,5,length(idx)))
trProb.est4[ , ,1] <- I.dA(Prob.msSurv)[ , ,idx[1]]
for (i in 2:length(idx)) trProb.est4[ , ,i] <-
         trProb.est4[ , ,i-1] %*% I.dA(Prob.msSurv)[ , ,idx[i]]
dimnames(trProb.est4) <- list(from=c("HIV","AIDS","SI","AIDS/SI","death"),
                              to=c("HIV","AIDS","SI","AIDS/SI","death"), time=et(Prob.msSurv)[idx])

## Check via Pst whether results are the same, e.g for s=4, t=10
Pst(Prob.msSurv, s=4, t=10)
sum(et(Prob.msSurv)[idx]<10) # 306
trProb.est4[,,306]

## Probability to live with AIDS, combined state
Prob.hiv.aids <- AJs(Prob.msSurv)[1,2,] + AJs(Prob.msSurv)[1,4,]
Prob.hiv.aids <- data.frame(time=et(Prob.msSurv), prob=Prob.hiv.aids)
row.names(Prob.hiv.aids) <- 1:nrow(Prob.hiv.aids)
Var.hiv.aids <- cov.AJs(Prob.msSurv)["HIV AIDS","HIV AIDS",] + cov.AJs(Prob.msSurv)["HIV AIDS/SI","HIV AIDS/SI",]  + 2*cov.AJs(Prob.msSurv)["HIV AIDS","HIV AIDS/SI",]
Prob.hiv.aids$lower.ci <- pmax(0,1-(1-Prob.hiv.aids$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob)))
Prob.hiv.aids$upper.ci <- 1-(1-Prob.hiv.aids$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob))

## Fixed horizon transition probabilities ending at t=10
idx <- which( et(Prob.msSurv)>=2 & et(Prob.msSurv) <=10)
n.time <- length(idx)
FixedHor10 <- array(0,dim=c(5,5,n.time))
FixedHor10[,,n.time] <- I.dA(Prob.msSurv)[,,idx[n.time]]
for (i in (n.time-1):1) FixedHor10[,,i] <-
              I.dA(Prob.msSurv)[,,idx[i]]%*%FixedHor10[,,i+1]
dimnames(FixedHor10) <- list(from=c("HIV","AIDS","SI","AIDS/SI","death"), to=c("HIV","AIDS","SI","AIDS/SI","death"),time=c(et(Prob.msSurv)[idx-1]))

## Plotting the results
## all state occupation probabilities, directly via plot command from msSurv package
plot(Prob.msSurv, plot.type="stateocc")

## FIGURE 3.4: Nelson-Aalen estimates
par(las=1)
plot(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[1,3,]), lwd=3, lty=2, ylim=c(0,7), xlim=c(0,13),
     xlab="time since HIV infection (years)", ylab="cumulative transition hazard", type="s")
lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[1,2 ,]), lwd=3, type="s")
lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[2,4 ,]), lwd=2, type="s")
lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[3,4 ,]), lwd=2, type="s", lty=2)
lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[4,5 ,]), col=c("grey50"), type="s")
lines(et(Prob.msSurv),cumsum(I.dA(Prob.msSurv)[2,5 ,]), lwd=2, col=c("grey50"), type="s")
legend(1,6, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
       lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n")
## an alternative way is via
## cumsum(dNs(Prob.msSurv)[,"dN 1 2"]/Ys(Prob.msSurv)[,"y 1"]) etc.
## the same estimates can be obtained using the survival package, for example
CumHaz.2to3 <- survfit(coxph(Surv(start,stop,end.stage=="AIDS/SI")~1, data=subset(aidssi.msSurv,start.stage=="SI")))
lines(CumHaz.2to3,fun="cumhaz",col="red",mark.time=FALSE,conf.int=FALSE)

## FIGURE 3.5: transition probabilities for all direct transitions
par(las=1)
plot(et(Prob.msSurv), AJs(Prob.msSurv)[1,2 ,], lwd=3, xlim=c(0,13), xlab="time since HIV infection (years)",
     ylab="transition probability", type="s", ylim=c(0,1))
lines(et(Prob.msSurv), AJs(Prob.msSurv)[1,3 ,], lwd=3, lty=2, type="s")
lines(et(Prob.msSurv), AJs(Prob.msSurv)[2,4 ,], lwd=2, type="s")
lines(et(Prob.msSurv), AJs(Prob.msSurv)[3,4 ,], lwd=2, type="s", lty=2)
lines(et(Prob.msSurv), AJs(Prob.msSurv)[2,5 ,], lwd=2, col=c("grey50"), type="s")
lines(et(Prob.msSurv), AJs(Prob.msSurv)[4,5 ,], col=c("grey50"), type="s")
legend(7,0.65,c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
       lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n")
## FIGURE 3.6 is similar, but uses trProb.est4

## FIGURE 3.7
par(las=1)
plot(et(Prob.msSurv),AJs(Prob.msSurv)[1,2,],lty=2,lwd=3,xlim=c(0,13),xlab="time since HIV infection (years)",ylab="transition probability", ylim=c(0,0.6),type="s")
lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,3,],lwd=3,lty=3)
lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,4,],lwd=3,lty=1,col="grey50")
lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,5,],lwd=1,lty=1,col="black")
lines(et(Prob.msSurv),AJs(Prob.msSurv)[1,1,],lwd=3,lty=1,col="black")
legend(0.5,0.5, c("0 HIV","1 AIDS","2 SI","3 AIDS/SI","4 death"), lwd=c(3,3,3,3,1), lty=c(1,2,3,1,1),
       col=c(rep("black",3),"grey50","black"), bty="n")

## FIGURE 3.8 (note: CI in FIGURE 3.8 slightly different, were based on the log scale in formula (1.23))
plot(prob~time, data=Prob.hiv.aids, type="s", xlab="time since HIV infection (years)",
     ylab="probability to be alive with AIDS", xlim=c(0,13), ylim=c(0,0.22), xaxs="i", lwd=3)
lines(lower.ci~time, data=Prob.hiv.aids, type="s", lty=3)
lines(upper.ci~time, data=Prob.hiv.aids, type="s", lty=3)
lines(et(Prob.msSurv), AJs(Prob.msSurv)[1,2 ,], lwd=2, type="s")

## FIGURE 3.9: compare Kaplan-Meier and Aalen-Johansen estimator
## We create some temporary variables that allow us to make a gray confidence band
tmp <- survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2)
x1 <- rep(tmp$time, each=2)[-1]
x2 <- rev(rep(tmp$time, each=2)[-1])
y1 <- 1-rep(tmp$lower,each=2)
y1 <- y1[-length(y1)]
y2 <- 1-rep(tmp$upper,each=2)
y2 <- rev(y2[-length(y2)])
## Creating the plot
par(las=1,mar=c(5.1,7.1,1,1))
plot(1,1, xlim=c(0, 13), xlab="time since HIV infection (years)", ylab="transition probability to death",
     ylim=c(0,0.7), type="n", xaxs="i")
polygon(c(x1,x2), c(y1,y2), density=-1, col=rgb(0.8,0.8,0.8, alpha=0.7), border=NA)
lines(survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2), fun="event", lwd=2, mark.time=FALSE,
      col="grey50", conf=FALSE)
lines(prob~time, data=Prob.hiv.death, lwd=2, type="s")
lines(lower.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s")
lines(upper.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s")
rm(x1,x2,y1,y2,tmp)

## FIGURE 3.10, fixed horizon prediction
plot(et(Prob.msSurv)[idx], FixedHor10[1,5,], type="s", ylim=c(0,1), xaxs="i", lwd=2, xlim=c(2,10),
     xlab="time since HIV infection (years)", ylab="10 years transition probability",
     cex.axis=0.9)
lines(et(Prob.msSurv)[idx], FixedHor10[1,1,], type="s", lty=3, lwd=2)
lines(et(Prob.msSurv)[idx], FixedHor10[3,5,], type="s")
lines(et(Prob.msSurv)[idx], FixedHor10[3,3,], type="s", lty=2)
lines(et(Prob.msSurv)[idx], FixedHor10[1,2,]+FixedHor10[1,4,], type="s", col="grey", lwd=2)
lines(et(Prob.msSurv)[idx], FixedHor10[3,4,], type="s", col="grey", lwd=1)
legend("top",c("death from HIV","death from SI","still in HIV state","still in SI state","AIDS from HIV","AIDS from SI"),
       lwd=c(2,1,2,1,2,1), lty=c(1,1,3,2,1,1), col=c(rep("black",4), rep("grey",2)), bty="n")

## FIGURE 3.11: State-AIDS entry distribution
# First redefine the model
Nodes <- c("HIV","AIDS","SI","AIDS/SI")
Edges <- list("HIV" = list(edges=c("AIDS","SI")),
              "AIDS" = list(edges=NULL),
              "SI" = list(edges=c("AIDS/SI")),
              "AIDS/SI" = list(edges=NULL))
ms.Diag.AIDS <- new("graphNEL", nodes=Nodes, edgeL=Edges, edgemode="directed")
# Calculation of transition probabilities
aidssi.msSurv.AIDS <- subset(aidssi.msSurv, !((start.stage=="AIDS")|(start.stage=="AIDS/SI")|(end.stage=="death")))
Prob.msSurv.AIDS <- msSurv(aidssi.msSurv.AIDS[,1:5], ms.Diag.AIDS, LT=TRUE)
Prob.hiv.aids2 <- AJs(Prob.msSurv.AIDS)[1,2,] + AJs(Prob.msSurv.AIDS)[1,4,]
Prob.hiv.aids2 <- data.frame(time=et(Prob.msSurv.AIDS), prob=Prob.hiv.aids2)
row.names(Prob.hiv.aids2) <- 1:nrow(Prob.hiv.aids2)
Var.hiv.aids <- cov.AJs(Prob.msSurv.AIDS)["HIV AIDS","HIV AIDS",] + cov.AJs(Prob.msSurv.AIDS)["HIV AIDS/SI","HIV AIDS/SI",]  + 2*cov.AJs(Prob.msSurv.AIDS)["HIV AIDS","HIV AIDS/SI",]
Prob.hiv.aids2$lower.ci <- pmax(0,1-(1-Prob.hiv.aids2$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob)))
Prob.hiv.aids2$upper.ci <- 1-(1-Prob.hiv.aids2$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob))
# Plotting
plot(prob~time, data=Prob.hiv.aids2, type="s", xlab="time since HIV infection (years)",
     ylab="probability to develop AIDS", xlim=c(0,13), ylim=c(0,0.8), xaxs="i", lwd=3)
lines(prob~time, data=Prob.hiv.aids, type="s", col="grey")
# add Kaplan-Meier
lines(survfit(Surv(aids.time.orig, pmax(aids.stat,siaids.stat))~1, data=aidssi2),
      fun="event", mark.time=FALSE, col="red")


#####################
## The mstate package
#####################

library(mstate)

## Specify allowed transitions
trans.mst <- transMat(x = list(c(2,3), c(4,5), 4, 5, c()),
             names = c("HIV", "AIDS", "SI", "AIDS/SI", "death"))
trans.mst

## create data set
aidssi.mst <- msprep(aidssi2, trans=trans.mst, time=c(NA,"aids.time","si.time","siaids.time","death.time"),
                     status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"),
                     start=list(time=aidssi2$entry.time,state=rep(1,nrow(aidssi2))))

## table with observed transitions
events(aidssi.mst)

## The computations
NA.surv <- coxph(Surv(Tstart,Tstop,status)~strata(trans), data=aidssi.mst)  # step 1
NA.mst <- msfit(NA.surv, vartype="greenwood", trans=trans.mst)              # step 2
Prob.mst <- probtrans(NA.mst, predt=0, method="greenwood")                  # step 3
## Transition probabilities starting at s=4
Prob.mst4 <- probtrans(NA.mst, predt=4, method="greenwood")
## Fixed horizon transition probabilities ending at t=10
Prob.mst.R <- probtrans(NA.mst, predt=10, direction="fixedhorizon")

## Summarizing results
summary(NA.mst)             # Nelson-Aalen
summary(Prob.mst)           # Aalen-Johansen
summary(Prob.mst, from=1)   # from state 1 only

## More flexibility with output Prob.mst from probtrans

## Transition probability from HIV, state 1, to death, state 5
Prob.hiv.death <- data.frame(time=Prob.mst[[1]]$time, prob=Prob.mst[[1]]$pstate5,
                                                     se=Prob.mst[[1]]$se5)
Prob.hiv.death$upper.ci <- 1-(1-Prob.hiv.death$prob)*
                exp(-qnorm(0.975)*Prob.hiv.death$se/(1-Prob.hiv.death$prob))
Prob.hiv.death$lower.ci <- pmax(0,1-(1-Prob.hiv.death$prob)*
                exp(qnorm(0.975)*Prob.hiv.death$se/(1-Prob.hiv.death$prob)))

## Probability to live with AIDS, combined state
## We need to redo the computations in order to store the covariance information as well
Prob.mst <- probtrans(NA.mst, predt=0, method="greenwood", covariance=TRUE)
Prob.hiv.aids <-
    data.frame(time=Prob.mst[[1]]$time, prob=Prob.mst[[1]]$pstate2+Prob.mst[[1]]$pstate4)
## Covariance information is stored in a separate component, which is a three-dimensional array.
## First two components define "from" and "to states; last component has the values
dimnames(Prob.mst[[6]][,,1]) # check how the elements are named
Var.hiv.aids <- Prob.mst[[1]]$se2^2 + Prob.mst[[1]]$se4^2 + 2*Prob.mst[[6]]["from1to2","from1to4",]
Prob.hiv.aids$lower.ci <- pmax(0,1-(1-Prob.hiv.aids$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob)))
Prob.hiv.aids$upper.ci <- 1-(1-Prob.hiv.aids$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids$prob))

## Plotting the results
plot(Prob.mst, type="single", lwd=c(3,3,2,2,2,1),
     lty=c(1,2,2,1,1,1), col=c(rep("black",4), rep("grey50",2))) # all transition probabilities from state 1, HIV
## another display format
plot(Prob.mst, type="filled", ord=1:5)

## FIGURE 3.4: Nelson-Aalen estimates
## With stacked format is easy via standard survival code
par(las=1)
plot(survfit(NA.surv), fun="cumhaz", mark.time=FALSE, lwd=c(3,3,2,2,2,1),
     lty=c(1,2,1,1,2,1),col=c(rep("black",3),"grey50","black","grey50"),ylim=c(0,7),xlim=c(0,13))
legend(1,6, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
       lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n")
## Alternative: via plot function for msfit object (has a bug with respect to lwd and lty)
plot(NA.mst, lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",3),"grey50","black","grey50"),
     ylim=c(0,7), xlim=c(0,13),
     legend=c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
     bty="n", legend.pos=c(1,6))

## FIGURE 3.5: transition probabilities for all direct transitions
par(las=1)
plot(pstate2~time, data=Prob.mst[[1]], lwd=3, type="s", xlim=c(0,13),
                                       xlab="time since HIV infection (years)", ylim=c(0,1))

lines(pstate3~time, data=Prob.mst[[1]], lwd=3, lty=2, type="s")
lines(pstate4~time, data=Prob.mst[[2]], lwd=2, type="s")
lines(pstate4~time, data=Prob.mst[[3]], lwd=2, lty=2, type="s")
lines(pstate5~time, data=Prob.mst[[4]], type="s", col="grey50")
lines(pstate5~time, data=Prob.mst[[2]], lwd=2, type="s", col="grey50")
legend(7,0.65,c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
       lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",2),"black","grey50","black",
                                                   "grey50"), bty="n")
## FIGURE 3.6 is similar, but uses Prob.mst4

## FIGURE 3.7
plot(Prob.mst,from=1, type="single", xlab="time since HIV infection (years)",
     ylab="transition probability", xlim=c(0,13), ylim=c(0,0.6), lwd=c(3,3,3,3,1), lty=c(1,2,3,1,1),
     col=c(rep("black",3),"grey50","black","grey50"),
     legend=c("0 HIV","1 AIDS","2 SI","3 AIDS/SI","4 death"),
     legend.pos="left")

## FIGURE 3.8 (note: CI in FIGURE 3.8 slightly different, were based on the log scale in formula (1.23))
plot(prob~time, data=Prob.hiv.aids, type="s", xlab="time since HIV infection (years)",
     ylab="probability to be alive with AIDS", xlim=c(0,13), ylim=c(0,0.22), xaxs="i", lwd=3)
lines(lower.ci~time, data=Prob.hiv.aids, type="s", lty=3)
lines(upper.ci~time, data=Prob.hiv.aids, type="s", lty=3)
lines(pstate2~time, data=Prob.mst[[1]], lwd=2, type="s")

## FIGURE 3.9: compare Kaplan-Meier and Aalen-Johansen estimator
## We create some temporary variables that allow us to make a gray confidence band
tmp <- survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2)
x1 <- rep(tmp$time, each=2)[-1]
x2 <- rev(rep(tmp$time, each=2)[-1])
y1 <- 1-rep(tmp$lower,each=2)
y1 <- y1[-length(y1)]
y2 <- 1-rep(tmp$upper,each=2)
y2 <- rev(y2[-length(y2)])
## Creating the plot
par(las=1,mar=c(5.1,7.1,1,1))
plot(1,1, xlim=c(0, 13), xlab="time since HIV infection (years)",
     ylab="transition probability to death", ylim=c(0,0.7), type="n", xaxs="i")
polygon(c(x1,x2), c(y1,y2), density=-1, col=rgb(0.8,0.8,0.8, alpha=0.7), border=NA)
lines(survfit(Surv(entry.time,death.time,death.stat)~1,data=aidssi2), fun="event", lwd=2,
      mark.time=FALSE, col="grey50", conf=FALSE)
lines(prob~time, data=Prob.hiv.death, lwd=2, type="s")
lines(lower.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s")
lines(upper.ci~time, data=Prob.hiv.death, lwd=2, lty=3, type="s")
rm(x1,x2,y1,y2,tmp)

## FIGURE 3.10, fixed horizon prediction
plot(pstate5~time, data=Prob.mst.R[[1]], xaxs="i", type="s", ylim=c(0,1), lwd=2, xlim=c(2,10),
     xlab="time since HIV infection (years)", ylab="10 years transition probability")
lines(pstate1~time, data=Prob.mst.R[[1]], type="s", lty=3, lwd=2)
lines(pstate5~time, data=Prob.mst.R[[3]], type="s")
lines(pstate3~time, data=Prob.mst.R[[3]], type="s", lty=2)
lines(I(pstate2+pstate4)~time, data=Prob.mst.R[[1]], type="s", col="grey", lwd=2)
lines(pstate4~time, data=Prob.mst.R[[3]], type="s", col="grey")
legend("top",c("death from HIV","death from SI","still in HIV state","still in SI state","AIDS from HIV","AIDS from SI"),
       lwd=c(2,1,2,1,2,1), lty=c(1,1,3,2,1,1), col=c(rep("black",4), rep("grey",2)), bty="n")


## FIGURE 3.11: State-AIDS entry distribution
# First redefine the model
trans.mst.AIDS <- transMat(x = list(c(2,3), c(), 4, c()),
             names = c("HIV", "AIDS", "SI", "AIDS/SI"))
trans.mst.AIDS
# Calculation of transition probabilities
aidssi.mst.AIDS <- subset(aidssi.mst, !((trans==3)|(trans==4)|(trans==6)))
NA.surv <- coxph(Surv(Tstart,Tstop,status)~strata(trans), data=aidssi.mst.AIDS)
NA.mst <- msfit(NA.surv, vartype="greenwood", trans=trans.mst.AIDS)
Prob.mst.AIDS <- probtrans(NA.mst, predt=0, method="greenwood", covariance=TRUE)
Prob.hiv.aids2 <-
    data.frame(time=Prob.mst.AIDS[[1]]$time,
               prob=Prob.mst.AIDS[[1]]$pstate2+Prob.mst.AIDS[[1]]$pstate4)
Var.hiv.aids <- Prob.mst.AIDS[[1]]$se2^2 + Prob.mst.AIDS[[1]]$se4^2 + 2*Prob.mst.AIDS[[5]]["from1to2","from1to4",]
Prob.hiv.aids2$lower.ci <- pmax(0,1-(1-Prob.hiv.aids2$prob)*exp(qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob)))
Prob.hiv.aids2$upper.ci <- 1-(1-Prob.hiv.aids2$prob)*exp(-qnorm(0.975)*sqrt(Var.hiv.aids)/(1-Prob.hiv.aids2$prob))
# Plotting
plot(prob~time, data=Prob.hiv.aids2, type="s", xlab="time since HIV infection (years)",
     ylab="probability to develop AIDS", xlim=c(0,13), ylim=c(0,0.8), xaxs="i", lwd=3)
lines(prob~time, data=Prob.hiv.aids, type="s", col="grey")
# add Kaplan-Meier
lines(survfit(Surv(aids.time.orig, pmax(aids.stat,siaids.stat))~1, data=aidssi2),
      fun="event", mark.time=FALSE, col="red")


##########################################################################
## Chapter 4
##########################################################################

##############################
## Section 4.2
##############################

## Separate analyses per event type
coxph(Surv(time, status==1) ~ ccr5, data = aidssi)
coxph(Surv(time, status==2) ~ ccr5, data = aidssi)


###############################
## Section 4.3
###############################

## 1. Same results in one analysis
##################################

## Creation of stacked data set (via the crprep function); creation of compound
## and type-specific covariables (without using expand.covs)
aidssi.w <- crprep("time", "cause", data=aidssi, trans=c("AIDS","SI"), cens="event-free",
                   id="patnr", keep="ccr5")
aidssi.w <- within(aidssi.w,{
    ccrSI <- ifelse(failcode=="SI"&ccr5=="WM",1,ifelse(is.na(ccr5),NA,0))
    ccrAIDS <- ifelse(failcode=="AIDS"&ccr5=="WM",1,ifelse(is.na(ccr5),NA,0))
    ccr.comb <- factor(ccrAIDS+2*ccrSI,labels=c("WW","WM.AIDS","WM.SI"))
})

## Select only rows that correspond to actual follow-up
aidssi.stack <- subset(aidssi.w, count==1)
## Some further code to make data correspond with the book; not necessary to perform analyses
aidssi.stack$Tstart <- aidssi.stack$weight.cens <- aidssi.stack$count <- NULL
aidssi.stack <- aidssi.stack[order(aidssi.stack$patnr),]
aidssi.stack$status <- ifelse(aidssi.stack$status==aidssi.stack$failcode,1,0)
names(aidssi.stack)[2] <- "time"

subset(aidssi.stack, patnr %in% c(14,3,15,8))[c(5,6,1,2,7,8,3,4),c(1:3,5,4,6:8)] # TABLE 4.3

## Combined analysis with interaction term
coxph(Surv(time, status) ~ ccr5 * strata(failcode),
                                            data = aidssi.stack)
## An alternative, but not recommended
coxph(Surv(time, status) ~ ccr5 * failcode + strata(failcode),
                                            data = aidssi.stack)

## Combined analysis with ccr.comb variable
fit.stacked <- coxph(Surv(time, status) ~ ccr.comb + strata(failcode), data = aidssi.stack)
fit.stacked
## A more detailed description is obtained using the summary function on the fit
summary(fit.stacked)
## Robust standard error
coxph(Surv(time, status) ~ ccr.comb + strata(failcode) + cluster(patnr), data = aidssi.stack)

## 2. Type-specific covariables
###############################

coxph(Surv(time, status) ~ ccrAIDS + ccrSI + strata(failcode), data = aidssi.stack)
## Different contrast, such that one term quantifies the additional effect
coxph(Surv(time, status) ~ ccr5 + ccrSI + strata(failcode), data = aidssi.stack)
coxph(Surv(time, status) ~ ccr5 + ccrAIDS + strata(failcode), data = aidssi.stack)

## 3. Effects equal over causes
###############################

coxph(Surv(time, status) ~ ccr5 + strata(failcode), data = aidssi.stack)
## Same result via
coxph(Surv(time, status!=0) ~ ccr5, data = aidssi)
## Robust standard error
coxph(Surv(time, status) ~ ccr5 + strata(failcode) + cluster(patnr), data = aidssi.stack)

## 4. Proportional baseline hazards
###################################

coxph(Surv(time, status) ~ ccrAIDS + ccrSI + failcode, data = aidssi.stack)
## Test for non-proportionality
cox.zph(coxph(Surv(time, status) ~ ccrAIDS + ccrSI + failcode, data = aidssi.stack))

## Proportional baseline hazards and effect ccr5 equal
coxph(Surv(time, status) ~ ccr5 + failcode, data = aidssi.stack)
## Section 4.4.3: effect failcode is also obtained from logistic regression
summary(glm(cause=="SI"~1, data=aidssi, subset=status>0&!is.na(ccr5), family=binomial))
## or directly from the data
event.table <- table(subset(aidssi, !is.na(ccr5))$cause)
qlogis(event.table["SI"]/(event.table["AIDS"]+event.table["SI"]))

##############################
## Section 4.5
##############################

## We first repeat the code that was shown above (as in section 3.7.3)
## Only run this if you need to recreate the aidssi2 data set
data(aidssi2)
aidssi2 <- aidssi2[,c(1,2,3,4,5,6,5,6,7:10)]
names(aidssi2)[c(7,8)] <- c("siaids.time","siaids.stat")
tmp.si.aids <- aidssi2$si.time < aidssi2$aids.time
aidssi2 <- within(aidssi2, {
    siaids.stat <- aids.stat*si.stat
    siaids.time <- pmax(aids.time,si.time)
    aids.stat[tmp.si.aids] <- 0
    si.stat[!tmp.si.aids] <- 0
})
aidssi2$aids.time.orig <- aidssi2$aids.time
aidssi2$si.time.orig <- aidssi2$si.time
aidssi2$aids.time[aidssi2$aids.stat==0] <- aidssi2$death.time[aidssi2$aids.stat==0]
aidssi2$si.time[aidssi2$si.stat==0] <- aidssi2$death.time[aidssi2$si.stat==0]
rm(tmp.si.aids)
trans.mst <- transMat(x = list(c(2,3), c(4,5), 4, 5, c()),
             names = c("HIV", "AIDS", "SI", "AIDS/SI", "death"))

## Create data set in stacked format, we include three (co)variables via the keep argument
aidssi.mst <- msprep(aidssi2, trans=trans.mst,
                     time=c(NA,"aids.time","si.time","siaids.time","death.time"),
                     status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"),
                     start=list(time=aidssi2$entry.time,state=rep(1,nrow(aidssi2))), id="patnr",
                     keep=c("ccr5","age.inf","aids.time"))
# create type-specific variables
aidssi.mst <- expand.covs(aidssi.mst, covs=c("ccr5","age.inf"), longnames=FALSE)

## separate analysis by end point
for(i in 1:6) print(coxph(Surv(Tstart,Tstop,status)~ccr5+I(age.inf/10),data=aidssi.mst,
                          subset=trans==i))

## 1. Combined analyses: assume effects to be equal
###################################################

## Combine ccr5 effect for transitions 2 and 3 and remove ccr5.2 from data
aidssi.mst$ccr5.23 <- aidssi.mst$ccr5.2+aidssi.mst$ccr5.3
aidssi.mst$ccr5.2 <-  NULL

## Likelihood ratio test whether age effect can be assumed equal for all transitions
## Fit model with all transition-specific covariables
fit.aidssi.all <- coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.23 + ccr5.1 + ccr5.4 +
                            ccr5.5 + ccr5.6 + I(age.inf.1/10)+ I(age.inf.2/10)+ I(age.inf.3/10)+
                                I(age.inf.4/10)+ I(age.inf.5/10) + I(age.inf.6/10), data=aidssi.mst)
## and model with equal age effects
fit.aidssi.mst <- coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.23 + ccr5.1 + ccr5.4 +
                            ccr5.5 + ccr5.6 + I(age.inf/10), data=aidssi.mst)
## Likelihood ratio test for from significant
anova(fit.aidssi.all,fit.aidssi.mst)

## Since we will use one overall age variable, we remove the type-specific variables
aidssi.mst$age.inf.1 <- aidssi.mst$age.inf.2 <- aidssi.mst$age.inf.3 <- aidssi.mst$age.inf.4 <- aidssi.mst$age.inf.5 <- aidssi.mst$age.inf.6 <- NULL
## Summary data set
summary(fit.aidssi.mst)

## Test effect on pathway HIV -> SI -> AIDS
fit.tmp <- coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.1 + ccr5.4 + ccr5.6 +
                     I(age.inf/10), data=aidssi.mst)
anova(fit.tmp,fit.aidssi.mst)

## 2. Proportional baseline hazards
###################################

## FIGURE 4.2: estimated cumulative baseline hazards
par(las=1)
plot(survfit(fit.aidssi.mst), fun="cumhaz", mark.time=FALSE, xlim=c(0,12), ylim=c(0,2),
     lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1), col=c(rep("black",3),"grey50","black","grey50"))
legend(8,1.8, c("1: HIV -> AIDS","2: HIV -> SI","3: AIDS -> AIDS/SI","4: AIDS -> death","5: SI -> AIDS/SI","6: AIDS/SI -> death"),
       lwd=c(3,3,2,2,2,1), lty=c(1,2,1,1,2,1),
       col=c(rep("black",2),"black","grey50","black", "grey50"), bty="n" )

## Fit the model and test for proportionality
aidssi.mst$trans <- factor(aidssi.mst$trans)
fit.aidssi.mst2 <- coxph(Surv(Tstart,Tstop,status) ~ relevel(trans,2) + ccr5.23 + ccr5.1 + ccr5.4 +
                             ccr5.5 + ccr5.6 + I(age.inf/10), data=aidssi.mst)
cox.zph(fit.aidssi.mst2)
## Use transition 1. HIV -> AIDS as reference transition
cox.zph(coxph(Surv(Tstart,Tstop,status) ~ trans + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 +
                  I(age.inf/10), data=aidssi.mst))

## Create variable tr.prop that defines the transitions that are assumed proportional
aidssi.mst$tr.AIDS <- ifelse(aidssi.mst$trans==1,1,2)
aidssi.mst$tr.prop <- factor(pmax(2,as.numeric(aidssi.mst$trans)),labels=c("2","3","4","5","6"))
## Fit the model
fit.aidssi.mst3 <- coxph(Surv(Tstart,Tstop,status) ~ strata(tr.AIDS) + tr.prop + ccr5.23 + ccr5.1 +
                             ccr5.4 + ccr5.5 + ccr5.6 + I(age.inf/10), data=aidssi.mst)
fit.aidssi.mst3

## 3. Dual role of intermediate states
######################################
coxph(Surv(Tstart,Tstop,status) ~ trans, data=aidssi.mst, subset=trans %in% c(1,5))


## 4. Beyond the Markov model: effect of transition time
#########################
aidssi.mst$aids.time <- ifelse(aidssi.mst$trans %in% c(4,6),aidssi.mst$Tstart,0)
coxph(Surv(Tstart,Tstop,status) ~ strata(tr.AIDS) + tr.prop + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 +
          ccr5.6 + I(age.inf/10) + aids.time, data=aidssi.mst)

## 5. Standard error
####################
coxph(Surv(Tstart,Tstop,status) ~ strata(trans) + ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6 +
          I(age.inf/10)+cluster(patnr), data=aidssi.mst)


#####################################################################################
## Chapter 5, Section 5.7.1
#####################################################################################

#######################################################
## From cause-specific/transition hazard to probability
#######################################################

## Competing risks
##################

## Basis: fitted proportional cause-specific hazards model
fit.stacked <- coxph(Surv(time, status) ~ ccr.comb +
                          strata(failcode), data = aidssi.stack)
## Create data frame with covariable value of interest and one row per event type
indiv.WW <- data.frame(strata=c("AIDS","SI"), ccr.comb="WW")
indiv.WM <- data.frame(strata=c("AIDS","SI"), ccr.comb=c("WM.AIDS","WM.SI"))
indiv.WW
indiv.WM
## or we can write
## indiv.WW <- subset(aidssi.stack, patnr==1)
## indiv.WW$strata <- factor(indiv.WW$failcode)
## indiv.WM <- subset(aidssi.stack, patnr==2)
## indiv.WM$strata <- factor(indiv.WM$failcode)

## Compute estimates of cause-specific cumulative incidence
## Steps two and three in mstate approach (see also Pages 139/140 of the book)
trans.SI <- trans.comprisk(2, c("event-free","AIDS","SI"))
Haz.WW <- msfit(fit.stacked, newdata=indiv.WW, trans=trans.SI)
Haz.WM <- msfit(fit.stacked, newdata=indiv.WM, trans=trans.SI)
AJprop.WW <- probtrans(Haz.WW, predt=0)
AJprop.WM <- probtrans(Haz.WM, predt=0)

## FIGURE 5.1
par(las=1)
plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"),
             weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2), xlim=c(0,13),
     ylim=c(0,1), xlab="time since HIV infection (years)", xaxs="i", fun="event")
lines(pstate3~time,data=AJprop.WW[[1]], type="s", lwd=2)
lines(pstate3~time,data=AJprop.WM[[1]], type="s", lwd=2, lty=2)
legend(1,0.55, c("WW","WM"), lwd=c(2,2), lty=c(1,2), bty="n", y.intersp=1.4)
lines(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5),
              subset(aidssi.wCCR,failcode=="AIDS"), weights=weight.cens), mark.time=FALSE,
      col="grey40", lty=c(1,2), xlim=c(0,13), ylim=c(0,0.48), xlab="", xaxs="i")
lines(1-pstate2~time,data=AJprop.WW[[1]], type="s", lwd=2)
lines(1-pstate2~time,data=AJprop.WM[[1]], type="s", lwd=2, lty=2)
legend(1,0.55, c("WW","WM"), lty=c(1,2), bty="n", y.intersp=1.4)
text(11,0.7, "AIDS")
text(11,0.1, "SI")

## Multi-state model
####################

## We first repeat the creation of the data set
aidssi.mst <- msprep(aidssi2, trans=trans.mst,
                     time=c(NA,"aids.time","si.time","siaids.time","death.time"),
                     status=c(NA,"aids.stat","si.stat","siaids.stat","death.stat"),
                     start=list(time=aidssi2$entry.time,state=rep(1,nrow(aidssi2))),
                     keep=c("ccr5","age.inf","aids.time"))
aidssi.mst <- expand.covs(aidssi.mst, covs=c("ccr5","age.inf"), longnames=FALSE)
aidssi.mst$ccr5.23 <- aidssi.mst$ccr5.2+aidssi.mst$ccr5.3
aidssi.mst$ccr5.2 <-  NULL
aidssi.mst$age.inf.1 <- aidssi.mst$age.inf.2 <- aidssi.mst$age.inf.3 <- aidssi.mst$age.inf.4 <- aidssi.mst$age.inf.5 <- aidssi.mst$age.inf.6 <- NULL
aidssi.mst$tr.AIDS <- ifelse(aidssi.mst$trans==1,1,2)
aidssi.mst$tr.prop <- factor(pmax(2,as.numeric(aidssi.mst$trans)),labels=c("2","3","4","5","6"))
## The model on which we want to base our predictions
fit.aidssi.mst3 <- coxph(Surv(Tstart,Tstop,status) ~ strata(tr.AIDS) + tr.prop + I(age.inf/10) +
                             ccr5.23 + ccr5.1 + ccr5.4 + ccr5.5 + ccr5.6, data=aidssi.mst)

## Estimate for individual with covariable combination age=25 and WW genotype
patWW25 <- data.frame(age.inf=rep(25,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)),
                      ccr5.23=rep(0,6), ccr5.1=rep(0,6), ccr5.4=rep(0,6), ccr5.5=rep(0,6),
                      ccr5.6=rep(0,6))
patWW25
trHaz.WW25 <- msfit(fit.aidssi.mst3, newdata=patWW25, trans=trans.mst)
prob.WW25 <- probtrans(trHaz.WW25, predt=2)
## Estimate for individual with covariable combination age=25 and WM genotype
patWM25 <- data.frame(age.inf=rep(25,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)),
                      ccr5.23=c(0,1,1,0,0,0), ccr5.1=c(1,rep(0,5)), ccr5.4=c(0,0,0,1,0,0),
                      ccr5.5=c(rep(0,4),1,0), ccr5.6=c(rep(0,5),1))
patWM25
trHaz.WM25 <- msfit(fit.aidssi.mst3, newdata=patWM25, trans=trans.mst)
prob.WM25 <- probtrans(trHaz.WM25, predt=2)
## Estimate for individual with covariable combination age=50 and WW genotype
patWW50 <- data.frame(age.inf=rep(50,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)),
                      ccr5.23=rep(0,6), ccr5.1=rep(0,6), ccr5.4=rep(0,6), ccr5.5=rep(0,6),
                      ccr5.6=rep(0,6))
trHaz.WW50 <- msfit(fit.aidssi.mst3, newdata=patWW50, trans=trans.mst)
prob.WW50 <- probtrans(trHaz.WW50, predt=2)
## Estimate for individual with covariable combination age=50 and WM genotype
patWM50 <- data.frame(age.inf=rep(50,6), strata=c(1,2,2,2,2,2), tr.prop=factor(c(2,2,3,4,5,6)),
                      ccr5.23=c(0,1,1,0,0,0), ccr5.1=c(1,rep(0,5)), ccr5.4=c(0,0,0,1,0,0),
                      ccr5.5=c(rep(0,4),1,0), ccr5.6=c(rep(0,5),1))
trHaz.WM50 <- msfit(fit.aidssi.mst3, newdata=patWM50, trans=trans.mst)
prob.WM50 <- probtrans(trHaz.WM50, predt=2)

## A warning message appears in all estimates:
## In probtrans(trHaz.WW25, predt = 2) :
##   Warning! Negative diagonal elements of (I+dA); the estimate may not be meaningful.
## This occurs if an increment in the Nelson-Aalen estimate is larger than 1.
## It typically occurs if the remaining number at risk is small and can usually be neglected

## FIGURE 5.3
plot(pstate5~time, data=prob.WW25[[1]], type="s", lwd=2, xlim=c(2,13), ylim=c(0,1),
     xlab="time since HIV infection (years)", ylab="death probability")
lines(pstate5~time, data=prob.WW25[[3]], type="s")
lines(pstate5~time, data=prob.WM25[[1]], type="s", lwd=2, lty=2)
lines(pstate5~time, data=prob.WM25[[3]], type="s", lty=2)
# Create bars for confidence intervals
tmp <- match(1, prob.WW25[[1]]$time>6)
with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2))
tmp <- match(1, prob.WW25[[1]]$time>10)
with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2))
tmp <- match(1, prob.WW25[[3]]$time>4)
with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp]))
tmp <- match(1, prob.WW25[[3]]$time>8)
with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp]))
tmp <- match(1, prob.WM25[[1]]$time>6)+10
with(prob.WM25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2))
tmp <- match(1, prob.WM25[[1]]$time>10)+7
with(prob.WM25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2))
tmp <- match(1, prob.WM25[[3]]$time>4)+5
with(prob.WM25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2))
tmp <- match(1, prob.WM25[[3]]$time>8)+5
with(prob.WM25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2))

## FIGURE 5.4
plot(pstate5~time, data=prob.WW25[[1]], type="s", lwd=2, xlim=c(2,13), ylim=c(0,1),
     xlab="time since HIV infection (years)", ylab="death probability")
lines(pstate5~time, data=prob.WW25[[3]], type="s")
lines(pstate5~time, data=prob.WW50[[1]], type="s", lwd=2, lty=2)
lines(pstate5~time, data=prob.WW50[[3]], type="s", lty=2)
tmp <- match(1, prob.WW25[[1]]$time>6)
with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2))
tmp <- match(1, prob.WW25[[1]]$time>10)
with(prob.WW25[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2))
tmp <- match(1, prob.WW25[[3]]$time>4)
with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp]))
tmp <- match(1, prob.WW25[[3]]$time>8)
with(prob.WW25[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp]))
tmp <- match(1, prob.WW50[[1]]$time>6)+10
with(prob.WW50[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2))
tmp <- match(1, prob.WW50[[1]]$time>10)+7
with(prob.WW50[[1]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lwd=2, lty=2))
tmp <- match(1, prob.WW50[[3]]$time>4)+5
with(prob.WW50[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2))
tmp <- match(1, prob.WW50[[3]]$time>8)+5
with(prob.WW50[[3]], segments(time[tmp], (pstate5*exp(qnorm(0.975)*se5/pstate5))[tmp], time[tmp],
                              (pstate5*exp(-qnorm(0.975)*se5/pstate5))[tmp], lty=2))


#######################################
## Regression on subdistribution hazard
#######################################

## Fit model based on one overall weight function
fit.FG <- coxph(Surv(Tstart,Tstop,status==failcode) ~ ccr.comb + strata(failcode), data=aidssi.w,
                weights=weight.cens)
fit.FG
## Using separate weight function per value of CCR5, differences are small
aidssi.wCCR <- within(aidssi.wCCR,{
     ccrAIDS <- ifelse(failcode=="AIDS"&ccr5=="WM", 1, ifelse(is.na(ccr5),NA,0) )
     ccrSI <- ifelse(failcode=="SI"&ccr5=="WM", 1, ifelse(is.na(ccr5),NA,0) )
     ccr.comb <- factor(ccrAIDS+2*ccrSI, labels=c("WW","WM.AIDS","WM.SI"))
 })
coxph(Surv(Tstart,Tstop,status==failcode)~ccr.comb+strata(failcode), data=aidssi.wCCR,
                weights=weight.cens)

## Create covariable combinations and compute predictions, based on fit.FG
indivs <- data.frame(ccr.comb=factor(c("WW","WW","WM.AIDS","WM.SI")),
                      failcode=c("AIDS","SI","AIDS","SI"))
pred.sdh <- survfit(fit.FG, newdata=indivs)

## FIGURE 5.5
plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2), xlim=c(0,13), ylim=c(0,1), xlab="time since HIV infection (years)", xaxs="i", fun="event")
lines(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="AIDS"), weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2))
legend(1,0.55,c("WW","WM"),lwd=c(2,2),lty=c(1,2),bty="n",y.intersp=1.4)
lines(pred.sdh[c(2,4)], mark.time=FALSE, fun="event", lty=1:2, lwd=2)
lines(pred.sdh[c(1,3)], mark.time=FALSE, lty=1:2, lwd=2)
text(11,0.7,"AIDS")
text(11,0.1,"SI")

## Test for non-proportionality
test.PH <- cox.zph(fit.FG, transform="identity")
test.PH
par(mfrow=c(1,2))
plot(test.PH)
par(mfrow=c(1,1))

## Fit proportional subdistribution hazards via cmprsk package, uses sandwich estimator of standard
## error
library(cmprsk)
with(aidssi, crr(time, status, as.numeric(ccr5)-1, failcode=1))
with(aidssi, crr(time, status, as.numeric(ccr5)-1, failcode=2))
## Effect estimate the same, standard error slightly different

## Test for proportionality, using sandwich-based estimator
# install.packages("crrSC")
library(crrSC)
with(aidssi, psh.test(time,status,as.numeric(ccr5)-1,D=1, tf=function(x) x))
## Since ccrSC requires the event of interest to have value 1, we create a temporary data set with
## SI event 1
tmp <- aidssi
tmp$status <- ifelse(tmp$status==2,1,tmp$status*2)
with(tmp, psh.test(time,status,as.numeric(ccr5)-1,D=1, tf=function(x) x))


##############################
## Proportional odds model
##############################

# install.packages("timereg")
library(timereg)
## SI as event of interest, one overall weight function
fit.Podds <- comp.risk(Event(time,status)~const(ccr5),
              data=subset(aidssi,!is.na(ccr5)), cause=2,
              times=subset(aidssi,!is.na(ccr5)&status==2)$time,  model="logistic"
                       )
coef(fit.Podds)
##  SI as event of interest, CCR5-specific weight function based on Cox model
fit.Podds.w <- comp.risk(Event(time,status)~const(ccr5),
              data=subset(aidssi,!is.na(ccr5)), cause=2,
              times=subset(aidssi,!is.na(ccr5)&status==2)$time,  model="logistic", cens.model="cox"
                         )
coef(fit.Podds.w)
## AIDS as event of interest
fit.Podds.AIDS <- comp.risk(Event(time,status)~const(ccr5),
              data=subset(aidssi,!is.na(ccr5)), cause=1,
              times=subset(aidssi,!is.na(ccr5)&status==1)$time,  model="logistic"
              )

newdata <- data.frame(ccr5=c(0,1))
pred.SI <- predict(fit.Podds, newdata=newdata)
pred.AIDS <- predict(fit.Podds.AIDS, newdata=newdata)

## FIGURE 5.7
plot(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5), subset(aidssi.wCCR,failcode=="SI"),
             weights=weight.cens), mark.time=FALSE, col="grey40", lty=c(1,2), xlim=c(0,13),
      ylim=c(0,1), xlab="time since HIV infection (years)", xaxs="i", fun="event")
lines(survfit(Surv(Tstart,Tstop,status==failcode)~strata(ccr5),
              subset(aidssi.wCCR,failcode=="AIDS"), weights=weight.cens), mark.time=FALSE,
      col="grey40", lty=c(1,2))
legend(1,0.55,c("WW","WM"),lwd=c(2,2),lty=c(1,2),bty="n",y.intersp=1.4)
lines(pred.SI$time,pred.SI$P1[1,],type="s",lwd=2)
lines(pred.SI$time,pred.SI$P1[2,],type="s",lty=2,lwd=2)
lines(pred.AIDS$time,1-pred.AIDS$P1[1,],type="s",lwd=2)
lines(pred.AIDS$time,1-pred.AIDS$P1[2,],type="s",lty=2,lwd=2)
text(11,0.7,"AIDS")
text(11,0.1,"SI")