################################################### ### chunk number 1: gethmd ################################################### ### Get an lx curve from the Human Mortality Database Ltab<-read.table(file='/hdir/0/carlm/213/PropHazI/US1X5.lifeTable', skip=2,header=T, as.is=T) ## not an ideal format for our purposes -- that age ranges are ## particularly pernicious head(Ltab,n=24) ### There's some code in this file that adds two columns age.lower and ### age.upper source(file="/hdir/0/carlm/213/PropHazI/snorkHMD.r") ltab<-cleanHMD(Ltab) ## ltab still has gobs of data on lots of years sort(unique(ltab$Year)) ## isolate a single year life table us05<-ltab[ltab$Year =="2005",] names(ltab) ## add a width column us05$width<-us05$age.upper - us05$age.lower +1 ################################################### ### chunk number 2: normbounce ################################################### rvnorm<- rnorm(10000) plot(sort(rvnorm),(1:length(rvnorm))/length(rvnorm),type='l',lty=1, main="Mapping randomly selected percentiles onto values\nusing the inverse CDF") draw.bands<-function(prob0=.05,prob1=.1){ ### draws gray bands that illustrate how a range of percentile ### values is projected onto a range of realized values from the ### underlying distribution polygon(x=c(-4,qnorm(prob0),qnorm(prob0),qnorm(prob1),qnorm(prob1),-4), y=c(prob0,prob0,-1,-1,prob1,prob1), density=15,col='gray',angle=dnorm(mean(prob0,prob1))*180) } draw.bands(.89,.99) draw.bands(.45,.55) ################################################### ### chunk number 3: randomExponentials ################################################### ##pick a "rate" parameter lambda<- .05 ## insert random uniforms into inverse CDF ranexp<- -log(runif(n=1000))/lambda ### the mean of the exponential(lambda) is 1/lambda mean(ranexp) 1/lambda ## plot result plot(sort(ranexp)) ## overlay red line of empirical truth by using R's inbuilt rexp() ## function for generating random exponentials lines(sort(rexp(n=1000,rate=lambda)),col='red',lwd=3) ################################################### ### chunk number 4: qqplot ################################################### qqplot(ranexp,rexp(n=1000,rate=lambda)) ################################################### ### chunk number 5: lxAsCDF ################################################### plot(us05$age.lower,(cdf<-1-us05$lx/us05$lx[1]),type='s') ### Generating a single age at death value from the distribution ###implied by lx #### (1)generate a random uniform to bounce off of 1-lx/lx[1] u<- runif(n=1) u<-.654 ## to make a better picture. ## Graphically, we want to "bounce" a horizontal ray at the level of ## "u" off of the CDF at 90 degrees. ## mathematically this translates to finding the highest age category ## X for which 1-lx[X]/lx[1] < u and then adding 1. LifeTime<- us05$age.lower[sum(cdf < u)+1] LifeTime ## Graphing ... arrows(x0=0, x1=LifeTime,, y1=u,y0=u,col='blue',lwd=3) arrows(x0=LifeTime, x1=LifeTime, y1=0,y0=u,col='blue',lwd=3) ################################################### ### chunk number 6: lives10K ################################################### ### (1) generate 100,000 ages at death; and initialize a variable to ### (1) hold all those numbers N<-100000 Us<-runif(n=N) lives<-cut(Us, breaks=c(1-(us05$lx/us05$lx[1]),1), labels=us05$age.lower) ## calculate the lx implied by the lives we just generated ## so that we might compare it to us05$lx lx.hat<- c(N,N-cumsum(table(lives))) #### compare the lx.hat that we just generated with the lx values that #### are supposed to define the distribution of lives cbind(us05$lx,lx.hat) ## clean up and plot lx.hat<-lx.hat[-25] ## the us05 life table doesn't have the last row names(lx.hat)<-us05$age.lower plot( lx.hat[1:24]~names(lx.hat)[1:24], type='s') lines(us05$age.lower, us05$lx, type='s',col='blue',lty=2,lwd=2) title("us05$lx/10 and simulated lx") ################################################### ### chunk number 7: hazards ################################################### Nages<-length(us05$Age) rise<- log(us05$lx[-1]) - log(us05$lx[-Nages]) run<- us05$width[-Nages] hazards <- -rise/run dotchart(hazards,labels=us05$Age,main="Hazard rate by age category") ################################################### ### chunk number 8: lxCinverse ################################################### lxCinverse<-function(u,LifeTable=us05){ ## This converts random uniforms into age at death values ## according to the lx in LifeTable ## u is a presumably random uniform -- but in needs to be between 0 ## and 1 stopifnot(length(u)>0) ## survival<-with(LifeTable,lx/lx[1]) ## First the discrete part: find the age category the u implies ## based on the step function representation of lx if (length(u)==1){ ## if x is a single scalar then so is ac.index. ac.index<-sum( u <= survival ) }else{ ## if u is a vector then we need to compare each value of u ## to each value of cdf which is trickier. ac.index <- apply( outer (u,survival, FUN="<="),MARGIN=1,FUN=sum) } ## survivors to the lower age bound of the category in which x lives age.atleast<-LifeTable$age.lower[ac.index] ## hazard rate to which survivors shall be exposed until next age category Lambda<-with(LifeTable, {rise<-(log(lx[-1])-log(lx[-length(lx)])) run<- age.lower[-1]-age.lower[-length(lx)] -rise/run}) ## Unlikely to matter but... Lambda<-c(Lambda,1) ## a very large u could land in 110+ ## select the lambda(s) that apply to each u lambda<-Lambda[ac.index] ## take the u that's left over after subtracting the portion of ## log(u) that was required to get to the lower bound of the age ## category, and apply the inverse CDF trick on the remaining bit. ## if u= zero we can't log it. uLeftOver<-ifelse(u>0, log(u) - log(survival[ac.index]),0) extraLife<- uLeftOver/-lambda return(age.atleast +extraLife) } ## generate some age-at-deaths for random us. They ought to be between ## 0 and 110 lxCinverse(runif(5)) ################################################### ### chunk number 9: plotlxCinverse ################################################### ## with() temporarily makes the columns of a data.frame act like ## vectors in the work space with(us05,plot(age.lower~I(lx/lx[1]),type='s')) ## curve() can plot the values of function curve(lxCinverse,from=0, to=1,lwd=2,add=TRUE,col='purple') title(main="Survival function: discrete and continuous from lx values") ################################################### ### chunk number 10: approx ################################################### #To take care of the open interval, we #assume that no one lives past 124 survival<-c(with(us05,lx/lx[1]),0) survival.age<-c(us05$age.lower,110) ## the approx() function performs linear interpolation. It finds y ## values associated with what you give it as "xout" based on ## ## interpolation between the y and x values that you give it lxCinverse.cheap<-approx(y=survival.age, x=survival, xout=seq(0,1,length=1000)) ## the output of approx is a list with components x and y names(lxCinverse.cheap) ## lots of graphics functions will take a list with x and y components plot(y=survival.age,x=survival,col='red',type='p') curve(lxCinverse,from=0, to=1,n=200,lwd=2,add=TRUE,col='purple') lines(lxCinverse.cheap,col='red',lwd=1) ################################################### ### Assignment Part I ################################################### ## Run a Cox Proportional Hazard regression on a data set consisting ## of two slightly different randomly generated lifetimes. (don't ## panic below are some pretty good instructions on how to do ## this). Then answer the question below. ## STEP BY STEP INSTRUCTIONS: ## 1) generate two sets of lifetimes with a slight difference lives0<- lxCinverse(u=runif(n=10000),LifeTable=us05) # all the # default values ## ## a slightly modified life table us05.vegetables<-us05 us05.vegetables$lx<-us05.vegetables$lx^(exp(-.25)) lives1<- lxCinverse(u=runif(n=10000),LifeTable=us05.vegetables) ## ## 2) create a data.frame consisting of the two vectors of lives and an ## ## indicator variable, "veggies" that tells which vector the lifetime ## ## is from slightly modified set of lifetimes Dset<-data.frame(lives=c(lives0,lives1), veggies=c(rep(0,length(lives0)), rep(1,length(lives1)))) ## 3) load the survival library library(survival) ### 4) create a special type of variable that is used for survival analysis Dset$depvar<-Surv(time=rep(0,dim(Dset)[1]), time2=Dset$lives, event=rep(1,dim(Dset)[1])) ## 5) run a cox proportional hazard regression cox.res<-coxph(depvar ~ veggies,data=Dset) ## ## ## look at the results: ## summary(cox.res) ## ######################################################################## ## QUESTION: scratch your head, and try to explain what this all means and why ## it might be interesting. HINT: look at the coefficient value. ######################################################################## ######################################################################## ## Assignment PART II ## Run the same experiment as you did in Part I -- but using the quick ## and dirty method of generating lifetimes. Note the huge difference in ## the results. If necessary, see the frustration management section ## below. ######################################################################## ################################################### ### chunk number 12: frustrationManagement eval=FALSE ################################################### ## ## ### Frustration management ## ## 1) generate two sets of lifetimes with a slight difference ## survival<-c(with(us05,lx/lx[1]),0) ## survival.age<-c(us05$age.lower,124) ## survival.vegetables<-survival^exp(-.25) ## lives0<-approx(y=survival.age, ## x=survival, ## xout=runif(n=10000))$y ## lives1<-approx(y=survival.age, ## x=survival.vegetables, ## xout=runif(n=10000))$y ## ## ## ## The rest is the same as for part I ## ## 2) create a data.frame consisting of the two vectors of lives and an ## ## indicator variable, "veggies" that tells which vector the lifetime ## ## is from slightly modified set of lifetimes ## ## Dset<-data.frame(lives=c(lives0,lives1), ## veggies=c(rep(0,length(lives0)), ## rep(1,length(lives1)))) ## ## ## ### 3) load the survival library ## library(survival) ## ## ### 4) create a special type of variable that is used for survival analysis ## Dset$depvar<-Surv(time=rep(0,dim(Dset)[1]), ## time2=Dset$lives, ## event=rep(1,dim(Dset)[1])) ## ## ## ## 5) run a cox proportional hazard regression ## cox.res<-coxph(depvar ~ veggies,data=Dset) ## ## ## look at the results: ## summary(cox.res)