# Posted with permission of the code author: # Beau Benjamin Bruce # Author email: bbbruce@emory.edu # # This code is available under GPL-2 # For license information, see # http://cran.r-project.org/web/licenses/GPL-2 library(KMsurv) data(burn) #=====> Convert a data frame to a counting process version <=====# #=====> Allows for time dependent variables to be introduced <=====# df.cp = function(data,t.var,status.var,covars=setdiff(names(data),c(t.var,status.var))) { # data: data frame that represents the dataset # t.var: the column name of "data" that represents the survival time variable # status.var: the column name of "data" that represents the status variable # covars: list other covariates to retain # Returned object: a data frame that breaks each event down to a counting process # sorted times, append to 0 t.sort <- c(0,sort(unlist(unique(data[t.var])))) # for each data point find times less than or equal to the obs' time t.list <- lapply(unlist(data[t.var]),function(x) t.sort[t.sort<=x]) # create a list of datasets with covariates and all relevant start/stop times # use with to set the environment to the correct list item df.list <- lapply(seq_along(t.list),function(i) cbind(with(list(x=t.list[[i]]), # start by removing one from end of x, stop by removing first of x # include the status variable in the dataframe - helpful later data.frame(start=head(x,-1),stop=tail(x,-1),data[i,c(status.var,covars)])))) # do.call uses df.list as the argument for rbind df <- do.call(rbind,df.list) # create the correct status need last time for each # subject with status=1 to to be status=1 but all others status=0 # # the lapply creates vectors 0,0,0,...,1 based on length of t.list # need to substract 2 because the lag takes one away, then need one for the 1 at end # do.call with c binds them together into a single vector # this is then multiplied by status to correct it keep.status <- do.call(c,lapply(t.list,function(x) c(rep(0,length(x)-2),1))) df[status.var] <- df[status.var] * keep.status df } #=====> Create the counting process data frame <=====# burn.cp <- df.cp(burn,'T1','D1') burn.cp <- within(burn.cp,{ T1Z1 <- log(stop)*Z1; }) #=====> Apply the Cox Proportional Hazards model <=====# coxph(Surv(start,stop,D1) ~ Z1+Z2+Z3+T1Z1,data=burn.cp)