################################################### ### chunk number 1: TFR0 ################################################### library(foreign) ## READ the data ## create a symlink to where the data are -- the data are in ## /data/commons, which is a good place to put data like this (easily ## reproducible IPUMS holds on to your extract definitions for at ## least a decade) system("ln -s /data/commons/carlm/ACS2009/ ACS2009") ## I compressed (gzipped) the data because that is what good ## responsible computer users do. To use the data we need to ## uncompress it. A GREAT place to uncompress the data is ## /72hours. If you use zcat as shown, the compressed copy will stay in ## /data/commons. The uncompressed copy will wind up in /72hours. It ## will be erased for you in a few days -- but you can get it back ## anytime you want. ##uncompress the data to /72hours system("zcat ACS2009/usa_00084.dta.gz > /72hours/acs09.dta") ## read the data into R. ## the "foreign" library has the code to read and process stata .dta files library(foreign) acs09<-read.dta(file="/72hours/acs09.dta") names(acs09) ## sum the person weight to see how many people the observations represent sum(acs09$perwt) ## 75.9 million women between age 15 and 50 ## IPUMS GOTCHA # 1 -- is.numeric(acs09$age) is.factor(acs09$age) ##levels(acs09$age) ## look at all these stupid levels -- which in ##this case have no observations. ## create a numeric age variable acs09$ageN<-as.numeric(acs09$age) -1 ## make sure it looks right table(acs09$ageN) table(as.character(acs09$age)) ## IPUMS GOTCHA #2 -- ## fertyr is the answer to the question: did respondent have birth in ## the last year? One would think that this was a yes/no question... but is.logical(acs09$fertyr) is.factor(acs09$fertyr) table(acs09$fertyr) ## The "N/A" here is NOT the same as NA.. acs09$fertyrL<-ifelse(acs09$fertyr=="Yes",TRUE,FALSE) table(acs09$fertyrL) ################################################### ### chunk number 2: numerator ################################################### ## The numerator should be a matrix of counts of births that occurred ## to women by age .. and how about citizenship status table(acs09$citizen) ## the N/A in this case means born in the US. ## let's get rid of the obsolete responses which were relevant back in ## the 19th Century ## A clever trick for getting rid of levels in a factor which do not ## have any observations: acs09$citizenF<-factor(as.character(acs09$citizen)) numer<-tapply(acs09$perwt*acs09$fertyrL, list(acs09$ageN,acs09$citizenF), sum) dim(numer) head(numer) ## A barplot would be nice right now barplot(t(numer),xlab="Age of woman",ylab="Births in 2009", col=(numercol<-1:length(levels(acs09$citizenF)))) legend(x="topright",fill=numercol,legend=levels(acs09$citizenF), cex=.6) ################################################### ### chunk number 3: denom ################################################### ## the denominator is easier -- All we want is a weighted count of the ## number of women of each age who were "at risk" of giving ## birth. We are neglecting mortality -- because dead women do not ## fill out census forms so all we need to do is count denom<-tapply(acs09$perwt, list(acs09$ageN,acs09$citizenF), sum) dim(denom) dim(numer) ASFR<-numer/denom matplot(ASFR,type='l',lty=1,lwd=2, xlab="Age of woman",ylab="ASFR in 2009", col=(numercol<-1:length(levels(acs09$citizenF)))) legend(x="topright",fill=numercol,legend=levels(acs09$citizenF), cex=.6) TFR<-apply(ASFR,2,sum); TFR