# Tim Miller (tmiller@demog.berkeley.edu) # July 31, 2000 # # Population Module # A uni-sex population projection with no migration. # Generate population counts by own age, parent's age, and year (Out1) # and household counts by own age, head's age, and year (Out2). # Projection is based on time series of total fertility rate (In1); # life expectancy at birth (In2); and # age of household formation (economic independence, In3). # Attach Population Module attach ("/home/davis/lee1/TIM.DATA/Splus.Data.Dirs/pop.module.usa",pos=1) attach ("/home/davis/lee1/TIM.DATA/Splus.Data.Dirs/pop.module.functions",pos=2) attach ("/home/davis/lee1/tim.miller/Data.miracle.usa2l",pos=3) options(object.size=9e10) # Need large allocation for the object arrays. # Input In1: Annual time-series of the total fertility rate # Interpolate values of total fertility rate for 1800 to 1854 tfr.usa.1800.1854 <- approx(c(1800,1810,1820,1830,1840,1850,1855), (c(7.03,6.92,6.73,6.55,6.14,5.42,5.31)), xout=seq(1800,1854))$y # TFR from 1855 to 1998 tfr.usa.1855.1998 <- c(5.31,5.26,5.30,5.30,5.21,5.21,5.07,4.72,4.38,4.26,4.34,4.47,4.56,4.54,4.57,4.55,4.55,4.58,4.57,4.54,4.55,4.45,4.37,4.27,4.23,4.24,4.28,4.25,4.29,4.29,4.23,4.11,4.06,4.06,3.99,3.87,3.93,4.01,3.98,3.89,3.83,3.77,3.64,3.60,3.57,3.56,3.38,3.38,3.41,3.44,3.50,3.53,3.52,3.51,3.46,3.42,3.34,3.35,3.35,3.30,3.25,3.23,3.28,3.24,3.00,3.17,3.24,2.97,2.96,2.98,2.84,2.73,2.72,2.60,2.46,2.45,2.31,2.22,2.11,2.19,2.14,2.10,2.12,2.18,2.13,2.19,2.29,2.54,2.63,2.48,2.40,2.88,3.21,3.01,3.00,2.97,3.14,3.23,3.29,3.39,3.42,3.51,3.58,3.51,3.51,3.654,3.629,3.474,3.333,3.208,2.913,2.721,2.558,2.464,2.456,2.480,2.267,2.010,1.879,1.835,1.774,1.738,1.790,1.760,1.808,1.840,1.812,1.828,1.799,1.807,1.844,1.838,1.872,1.934,2.014,2.081,2.073,2.065,2.046,2.036,2.019,2.027,2.0325, 2.0595 ) # Assume a future trajectory for the TFR. # Here, we assume the tfr declines to 1.9. x <- seq(0,102) tfr.end <- 1.9 tfr.start <- 2.0595 tfr.usa.1998.2100 <- tfr.end + ((tfr.start - tfr.end)*(.95^x)) # Population Projections Program, Population Division, # U.S. Census Bureau (2000). # Population projections of the United States by age, sex, race, # hispanic origin, and nativity: 1999 to 2100. # www.census.gov/population/projections/nation/summary/np-t7-a.txt tfr.census.middle.1999.2100 <- approx (c(1999,2025,2050,2100), c(2.0475, 2.2068, 2.219, 2.1829), xout=seq(1999,2100))$y tfr.census.low.1999.2100 <- approx (c(1999,2025,2050,2100), c(2.0358, 1.8655, 1.7997, 1.6321), xout=seq(1999,2100))$y tfr.census.high.1999.2100 <- approx (c(1999,2025,2050,2100), c(2.0592, 2.5575, 2.6468, 2.7374), xout=seq(1999,2100))$y #The 2000 annual report of the Board of Trustees of the Federal # Old-Age and Survivors Insurance and Disability Insurance # Trust Funds. # www.ssa.gov/OACT/TR/TR00/triid.html#pgfld-55922 tfr.ssa.middle.1999.2100 <- approx (c(1999,2024), c(2.06,1.95), xout=seq(1999,2100), rule=2)$y tfr.ssa.low.1999.2100 <- approx (c(1999,2024), c(2.06,2.2), xout=seq(1999,2100), rule=2)$y tfr.ssa.high.1999.2100 <- approx (c(1999,2024), c(2.06,1.7), xout=seq(1999,2100), rule=2)$y # Assemble TFR series for 1800 to 2100 tfr.usa.1800.2100 <- c(tfr.usa.1800.1854, tfr.usa.1855.1998, tfr.ssa.middle.1999.2100) # Extend tfr for another 200 years tfr.usa.1800.2300 <- c(tfr.usa.1800.2100, rep(tfr.usa.1800.2100[(2100-1800+1)], 200)) # Input In2: Annual time-series of life-expectancy at birth # Interpolate values of e(0) for 1800 to 1900 based on # historical data from Massachusettes for 1850 to 1895 and # U.S. national data for 1900. e0.mass.1850.1895 <- c(38.3,38.7,41.7,42.5,44.1) e0.mass.1850.1895.year <- c(1850,1855,1880,1890,1895) e0.usa.1800.1899 <- approx( c(e0.mass.1850.1895.year,1900), c(e0.mass.1850.1895,47.3), xout=seq(1800,1899), rule=2 )$y e0.usa.1900.1997 <- c(473,491,515,505,476,487,487,476,511,521,500,526,535,525,542,545,517,509,391,547,541,608,596,572,597,590,567,604,568,571,597,611,621,633,611,617,585,600,635,637,629,648,662,633,652,659,667,668,672,680,682,684,686,688,696,696,697,695,696,699,697,702,700,699,702,702,701,705,702,705,709,711,712,714,720,726,729,733,735,739,737,741,745,746,747,747,747,749,749,751,754,755,758,755,757,758,761,765)/10 # Get future rates from a recent projection test <- get("result.improved.sy4.1998", "lee1/historical.usa/Data") e0.usa.1998.2298 <- test$e0.forecast rm (test) e0.usa.1800.2300 <- c( e0.usa.1800.1899, e0.usa.1900.1997, e0.usa.1998.2298, rep(e0.usa.1998.2298[301],2)) # Remove the 1918 flu epidemic, # replace with average of 1917 and 1919 e0s e0.usa.1800.2300.noflu <- e0.usa.1800.2300 e0.usa.1800.2300.noflu[(1918-1799)] <- (e0.usa.1800.2300[(1917-1799)] + e0.usa.1800.2300[(1919-1799)])/2 # derive kt.values unix.time(kt.usa.1800.2300.noflu <- sapply(e0.usa.1800.2300.noflu, kt.from.e0.taiwan)) #[1] 2578.74 20.96 2661.00 0.00 0.00 # Output Out1: The population projection # Derive nLx values. unix.time(Lx.usa.1800.2300 <- sapply(kt.usa.1800.2300.noflu, get.nLx.taiwan)) # Define maximum age and maximum run length based on Lx matrix max.age <- dim(Lx.usa.1800.2300)[1] max.run.length <- dim(Lx.usa.1800.2300)[2] # Define the probability of surviving from age n to n+1 ratio.Lx.usa.1800.2300 <- matrix(0,max.age,max.run.length) # For first year of life use 1L0/l0 ratio.Lx.usa.1800.2300[1,] <- Lx.usa.1800.2300[1,]/1 # Define L(n+1)/L(n) ratio.Lx.usa.1800.2300[2:110,] <- Lx.usa.1800.2300[2:110,]/ Lx.usa.1800.2300[1:109,] # Final age group ratio.Lx.usa.1800.2300[111,] <- Lx.usa.1800.2300[111,]/ (Lx.usa.1800.2300[110,]+Lx.usa.1800.2300[111,]) # Data check # Run for 200 years and check for population stability # Pop.array contains population counts by own age, age of # parent, and year. Note that age is a position marker: # the first element is age 0-1, the second is 1-2, ... , # the final element at position 111 represents ages 110+. pop.array <- array (0,dim=c(max.age,max.age,200)) pop.array[,,1] <- 1 # Initial population of 12,321 people. # Set constant survival and fertility rates using values from 1800 survival.rate <- ratio.Lx.usa.1800.2300[,1] fertility.rate <- c(rep(0,15), asfr.from.tfr(tfr.usa.1800.2300[1], asfr.taiwan.1951, asfr.taiwan.1991)/2, rep(0,41)) for ( time in 1: 199) { pop.matrix <- pop.array[,,time] pop.array[,,time+1] <- forecast.pop.mat(pop.matrix,survival.rate,fertility.rate)} # Use pop.matrix from 200th year as starting population # for the U.S. in 1800. Adjust to actual population size # of 5.3 million (Series A 1-5, Hist. Stats of the US) initial.pop.usa.1800 <- pop.array[,,200] * (5.3e6/sum( pop.array[,,200])) # Calculate population growth rate and age distribution # and then compare these to those implied by stable theory pop.size <- apply(pop.array[,,1:200],3,sum) pop.growth <- log(pop.size[2:200]/pop.size[1:199]) pop.agedist <- apply(pop.array[,,200],1,sum) # Calculate population growth rate and age distribution # implied by stable theory mean.age <- sum(fertility.rate*Lx.usa.1800.2300[,1]*seq(0,110))/ sum(fertility.rate*Lx.usa.1800.2300[,1]) # 29.16425 r <- log(sum(fertility.rate*Lx.usa.1800.2300[,1]))/mean.age # 0.02542424 pop.growth[199] #0.02547743 # The growth rates match to 4 decimal places. # Now compare population age distibutions plot (pop.agedist/sum(pop.agedist)) lines (Lx.usa.1800.2300[,1]*exp(-r*seq(0,110))/ sum(Lx.usa.1800.2300[,1]*exp(-r*seq(0,110)))) # These are identical. # Projection the population from 1800 to 2300 # Set run.length at 501 years (1800 to 2300) run.length <- 501 pop.array <- array (0,dim=c(max.age,max.age,run.length)) pop.array[,,1] <- initial.pop.usa.1800 # initial population # set survival rate and fertility rate from 1800 for ( time in 1: (run.length-1) ) { survival.rate <- ratio.Lx.usa.1800.2300[,time] fertility.rate <- c(rep(0,15), asfr.from.tfr(tfr.usa.1800.2300[time], asfr.taiwan.1951, asfr.taiwan.1991)/2, rep(0,41)) pop.matrix <- pop.array[,,time] pop.array[,,time+1] <- forecast.pop.mat(pop.matrix,survival.rate,fertility.rate)} # Calculate population size, growth rate and age distribution pop.agedist <- apply(pop.array,c(1,3),sum) pop.size <- apply(pop.agedist,2,sum) pop.growth <- log(pop.size[2:run.length]/pop.size[1:(run.length-1)]) plot (seq(1800,2000),pop.size[1:201]/1e6, type ="l", main = "Population for US (no immigration)", xlab="year", ylab="millions of people") stampp(.7) plot (seq(1800,2000),pop.growth[1:201]*100, type ="l", main = "Population growth for US (no immigration)", xlab="year", ylab="percent") stampp(.7) # Output Out2: Assemble population into households # Population counts by own age and age of household head # Input In3: Set age of economic independence (household formation) age.independence <- rep(21,run.length) household.array <- array (0,dim=c(max.age,max.age,run.length)) for (time in 1:run.length){ household.array[,,time] <- form.households(pop.array[,,time],age.independence[time])} # Total number of household heads by own age and time head.matrix <- apply(household.array,3,diag) # Total number of people in households with heads age x at time t hhsize.matrix <- apply(household.array,c(2,3),sum)