# Posted with permission of the code author:
# Beau Benjamin Bruce
# Author email: bbbruce@emory.edu
# 
# This code is available under GPL-2
# For license information, see
# http://cran.r-project.org/web/licenses/GPL-2

library(KMsurv)
data(burn)

#=====> Convert a data frame to a counting process version <=====#
#=====> Allows for time dependent variables to be introduced <=====#
df.cp = function(data,t.var,status.var,covars=setdiff(names(data),c(t.var,status.var))) {
  # data: data frame that represents the dataset
  # t.var: the column name of "data" that represents the survival time variable
  # status.var: the column name of "data" that represents the status variable
  # covars: list other covariates to retain
  
  # Returned object: a data frame that breaks each event down to a counting process
  
  # sorted times, append to 0
  t.sort <- c(0,sort(unlist(unique(data[t.var]))))
  
  # for each data point find times less than or equal to the obs' time
  t.list <- lapply(unlist(data[t.var]),function(x) t.sort[t.sort<=x])
  
  # create a list of datasets with covariates and all relevant start/stop times
  # use with to set the environment to the correct list item
  df.list <- lapply(seq_along(t.list),function(i) cbind(with(list(x=t.list[[i]]),
                     # start by removing one from end of x, stop by removing first of x
                     # include the status variable in the dataframe - helpful later                                        
                     data.frame(start=head(x,-1),stop=tail(x,-1),data[i,c(status.var,covars)]))))

  # do.call uses df.list as the argument for rbind
  df <- do.call(rbind,df.list)

  # create the correct status need last time for each
  # subject with status=1 to to be status=1 but all others status=0
  #
  # the lapply creates vectors 0,0,0,...,1 based on length of t.list
  # need to substract 2 because the lag takes one away, then need one for the 1 at end
  # do.call with c binds them together into a single vector
  # this is then multiplied by status to correct it
  keep.status <- do.call(c,lapply(t.list,function(x) c(rep(0,length(x)-2),1)))
  df[status.var] <- df[status.var] * keep.status
  df
}


#=====> Create the counting process data frame <=====#
burn.cp <- df.cp(burn,'T1','D1')
burn.cp <- within(burn.cp,{ T1Z1 <- log(stop)*Z1; }) 

#=====> Apply the Cox Proportional Hazards model <=====#
coxph(Surv(start,stop,D1) ~ Z1+Z2+Z3+T1Z1,data=burn.cp)