######################################################################## ## demonstration file for Proportional Hazard Week2. ## Demography 213 ################################################### ### chunk number 1: getstuff ################################################### #line 75 "welcome.Rnw" ## This is code from last week. It just reads a life table from an HMD ## file and adds a couple of useful columns. ### Get an lx curve from the Human Mortality Database Ltab<-read.table(file='/hdir/0/carlm/213/PropHazI/US1X5.lifeTable', skip=2,header=T, as.is=T) ## load the cleanHMD function source(file="/hdir/0/carlm/213/PropHazI/snorkHMD.r") ## run the cleanHMD to add the age bound columns ltab<-cleanHMD(Ltab) ## isolate a single year life table us05<-ltab[ltab$Year =="2005",] ################################################### ### chunk number 2: lxiCqdxb ################################################### #line 131 "welcome.Rnw" ## We'll give our function the catchy name lxicqdxb for lx inverse ## continuous quick and dirty with Xbeta. lxicqdxb<-function(Xbeta,LifeTable=us05){ ## This will generate quick and dirty lifetimes based on the ## provided lx values with hazards proportionally shifted for each ## value if Xbeta. Xbeta is the linear part of the proportional ## hazard model. The tricky bit is that each individual in the ## sample with a unique value of Xbeta (covariatesXparameters) has a ## unique hazard function -- the baseline hazard shifted ## proportionally per their very own personal Xbeta. if(missing(Xbeta)){stop("Xbeta is the whole point of this function")} ## If Xbeta is produced by %*% w then it will be a 1XN matrix rather ## than a vector. We could work with it either way but we need to ## pick one. Xbeta<-as.vector(Xbeta) ### just in case we draw a u very close to 1.. ## We'll add a zero onto the end to make sure everyone behaves Survival<-c(LifeTable$lx,0)/LifeTable$lx[1] ## We'll add a corresponding maximum age beyond which no one misbehaves ages<-c(LifeTable$age.lower,115) ## this is where ## the proportional shift happens -- NOTE that we are effectively ## raising lx to ## the power of the exp(Xbeta). To work out why, recall that the ## hazard is the negative slope of the log of lx. survivals<-outer(Survival,exp(Xbeta),"^") u<-runif(n=length(Xbeta)) res<-NULL ## we need to run approx() separately for each individual, luckily ## it's a pretty quick (and dirty) function so we can just loop it. ## notice that we are interpolating the inverse of the survival function for(i in 1:length(u)){ res<-c(res,approx(x=survivals[,i],y=ages,xout=u[i])$y) } return(res) } ## sanity check: if Xbeta=0 then it's all based on us05$lx ltimes.baseline<-lxicqdxb(Xbeta=rep(0,10000)) mean(ltimes.baseline) us05$ex[1] mean(ltimes.baseline[ltimes.baseline> 50]) - 50 us05$ex[us05$age.lower==50] mean(ltimes.baseline[ltimes.baseline> 90]) - 90 us05$ex[us05$age.lower==90] ################################################### ### chunk number 3: test_with_cox ################################################### #line 193 "welcome.Rnw" library(survival) ## N observations N<-2500 ## X is a matrix of covariates we'll just use standard normals for ## charm and angst, virtue will be a dummy/categorical variable. X<- data.frame(charm=rnorm(N), virtue=runif(N)<.5, angst=rnorm(N)) ## beta is a vector of parameters that determine the effect of ## the covariates on the hazard function Xbeta<-as.matrix(X)%*%(beta<-c(.1, -.2, .3)) names(beta)<-c("charm","virtue","angst") ## Just like last week, we'll create a data frame with a special ## dependent variable for coxph() Dset<-data.frame(X, depvar<-Surv(time=rep(0,N), time2=lxicqdxb(LifeTable=us05,Xbeta=Xbeta), event=rep(1,N)) ) mod1<-coxph(depvar~charm+virtue +angst,data=Dset) summary(mod1) ## did we recover beta or what? beta ################################################### ### chunk number 4: monotonic_fun ################################################### #line 231 "welcome.Rnw" ## Using the same Xbeta from last time ## the covariates on the hazard function Xbeta<-as.matrix(X)%*%(beta) ## we'll set up two coxph estimations -- the only difference is that ## in one, we will make some very drastic changes to the dependent variable. Time2.original<-lxicqdxb(LifeTable=us05,Xbeta=Xbeta) ##Time2.modified<- log(rank(Time2.original)^.75) +rank(Time2.original)/25 Time2.modified<- log((rank(Time2.original)+1)^.75) +trunc(Time2.original/25) plot(Time2.modified~Time2.original,main="Modified Simulated Lifetime") Dset<-data.frame(X, depvar.original<-Surv(time=rep(0,N), time2=Time2.original, event=rep(1,N)), depvar.modified<-Surv(time=rep(0,N), time2=Time2.modified, event=rep(1,N)) ) ## estimate the cox model with original and modified ## dependent variable summary(coxph(depvar.original~charm+virtue +angst,data=Dset)) summary(coxph(depvar.modified~charm+virtue +angst,data=Dset)) ############################################# ## QUESTION: WTF? ## Or, more scientifically, how does this result justify our use of the ## quick and dirty approach? ############################################# ################################################### ### chunk number 5: plotting_coxph ################################################### #line 276 "welcome.Rnw" ## the survfit function is used to calculate hazard, survival and ## other functions from hazard models. In it's simplest form it takes ## a coxph object (the output of a coxph()) estimation, and a matrix ## of covariates. Each row of the matrix represents and individual's ## covariates. ndat<-data.frame(rbind(c(0,0,0),c(9,9,9))) names(ndat)<-names(X) ## individual=FALSE means that each row of newdata represents a distinct ## individual entitled to her own survival curve. The TRUE option is used ## when there are time-varying covariates. mod1.fit<-survfit(mod1,newdata=ndat,se.fit=FALSE,individual=FALSE) ## the output of survfit can be fed to the plot() function plot(mod1.fit,col=c("blue","red"), main="Survival functions from baseline and a Herman Cain supporter") ################################################### ### chunk number 6: measurementerror ################################################### #line 311 "welcome.Rnw" Xbeta.original<-as.matrix(X)%*%(beta) error.size<-5 Xbeta.deviated<- Xbeta.original+ rnorm(n=N, mean=0, sd=error.size) summary(Xbeta.original) summary(Xbeta.deviated) plot(Xbeta.deviated~Xbeta.original,main="Xbeta vs Xbeta+random error", col=rainbow(100),pch=1) Time2.original<-lxicqdxb(LifeTable=us05,Xbeta=Xbeta.original) Time2.deviated<-lxicqdxb(LifeTable=us05,Xbeta=Xbeta.deviated) Dset<-data.frame(X, depvar.original<-Surv(time=rep(0,N), time2=Time2.original, event=rep(1,N)), depvar.deviated<-Surv(time=rep(0,N), time2=Time2.deviated, event=rep(1,N)) ) summary(coxph(depvar.original~charm+virtue +angst,data=Dset)) summary(coxph(depvar.deviated~charm+virtue +angst,data=Dset)) ############################################# ## QUESTION: Experiment enough to form an opinion as to how much error ## in Xbeta is too much. Note the Pr(>|z|) column and the se(coef) ## column as well as whether or not the coefficients are recovered. ## For the curious: try adding random error to just one variable. ############################################# ################################################### ### chunk number 7: uninformative.censoring ################################################### #line 363 "welcome.Rnw" ## start with the usual Xbeta Xbeta<-as.matrix(X) %*% (beta) ## the usual simulated lifetimes Time2.original<-lxicqdxb(LifeTable=us05,Xbeta=Xbeta) fraction.censored<-.25 Dset<-data.frame(X, depvar.original<-Surv(time=rep(0,N), time2=Time2.original, event=rep(1,N)), depvar.censored<-Surv(time=rep(0,N), time2=Time2.original, ## here is where the censoring happens ##event=ifelse(runif(N) -1 ## event=ifelse(X$charm> -1, 0,1)) ############################################# ################################################### ### chunk number 8: nonproportionaldata ################################################### #line 424 "welcome.Rnw" ## ##N<-1000 ## X is a matrix of covariates we'll just use standard normals X<- data.frame(charm=rnorm(N), virtue=rnorm(N), angst=rnorm(N)) Xbeta<-as.matrix(X)%*%(beta) ## Start with Xbeta based on the charm and virtue -- we'll make angst ## be nonproportional Xbeta.nonp<-as.matrix(X[,1:2])%*%(beta[1:2]) curve({ifelse(x < 0,6,x^2) }, from= -3, to=3, main="Effect of angst on survival") Xbeta.nonp<- Xbeta.nonp + ifelse(X$angst < 0,6,X$angst^2) Time2.original<-lxicqdxb(LifeTable=us05,Xbeta=Xbeta) Time2.nonp<-lxicqdxb(LifeTable=us05,Xbeta=Xbeta.nonp) ################################################### ### chunk number 9: nonpropXbetaPlot ################################################### #line 444 "welcome.Rnw" plot(Xbeta.nonp~Xbeta,main="Xbeta vs Xbeta with non-proportional angst") ################################################### ### chunk number 10: nonpropEstimation ################################################### #line 448 "welcome.Rnw" Dset<-data.frame(X, depvar.original<-Surv(time=rep(0,N), time2=Time2.original, event=rep(1,N)), depvar.nonp<-Surv(time=rep(0,N), time2=Time2.nonp, event=rep(1,N)) ) ## estimate the cox model with original and modified ## dependent variable summary(mod1<-coxph(depvar.original~charm+virtue +angst,data=Dset)) ### Hmm what do you think of that P value on angst? summary(mod1.np<-coxph(depvar.nonp~charm+virtue +angst,data=Dset)) ################################################### ### chunk number 11: nonpropotionaltiy_tests ################################################### #line 481 "welcome.Rnw" mod1.np.z<-cox.zph(mod1.np) ## the Schoenfeld residuals are calculated for each covariate -- they ## are the difference between the failure time and expected failure ## time calculated with the estimated coefficients ## here is a plot of the Schoenfeld residuals against time for "charm" ## -- charm *is* proportional so there should be no pattern plot(mod1.np.z[1,]) ################################################### ### chunk number 12: angst_nonprop_plot ################################################### #line 493 "welcome.Rnw" ## Now draw the plot for angst plot(mod1.np.z[3,]) ## print the output of cox.zph() for a numerical test of proportionality ## If the proportionality assumption holds, then the correlation ## between the Schoenfeld residuals and time should be zero. The chisq ## column is a whether or not the correlation *is* significantly ## different from zero. If it *IS* then we have a non-proportionality ## problem. print(mod1.np.z) ############################################# ## GOTCHA: The Schoenfeld residuals are not defined for censored ## observations. Consequently, a dataset that suffers from both a lot ## of censoring and non-proportionality... could mean trouble. ############################################# ################################################### ### chunk number 13: strata ################################################### #line 525 "welcome.Rnw" ## cooking up the same angst filled example data Xbeta<-as.matrix(X[,1:3])%*%(beta<-c(.1, -.2, .3)) ## Start with Xbeta based on the charm and virtue -- we'll make angst ## be non-proportional Xbeta.nonp<-as.matrix(X[,1:2])%*%(beta[1:2]) ##curve({ifelse(x < 0,6,x^2) }, from= -3, to=3, main="Effect of angst on survival") Xbeta.nonp<- Xbeta.nonp + ifelse(X$angst < 0,6,X$angst^2) ## create a new variable on which to stratify 0 is where angst changes ## it's tune X$broody<-X$angst < 0 Dset<-data.frame(X, depvar.nonp<-Surv(time=rep(0,N), time2=lxicqdxb(Xbeta.nonp), event=rep(1,N)) ) summary(mod2.np<-coxph(depvar.nonp~charm+virtue +angst +strata(broody), data=Dset)) ################################################### ### chunk number 14: stratplot ################################################### #line 553 "welcome.Rnw" ## Plot the survival function, revealing the effect of stratifying. plot(survfit(mod2.np),main="Stratified survival functions") ## In case you want to rerun earlier code, we'll remove broody from X X$broody<-NULL