/******************* * garba90srm.do * calculate sex ratio for legal marriage ages w/in a prefecture using specified age endpoint * version for coale.demog.berkeley.edu * thoughts: * we are treating prefectures as closed marriage markets * maybe treat beijing, shanghai, tianjin, and other major cities separately, otherwise SR is too high * should look at traditional 95th percentile first marriage ages from longitudinal data to make good bounds * * v3: now does srm of 20-30 for both sexes and fixed a typo * v2: updated to harmonize names, added figure B * unix path "/data/nta0/avi/data/china/china1990/ipums"; file "ebenstei_econ_berkeley_edu_012.dta" * home path "c:/users/iisan7/desktop/chinapref/1990" * datasets: input cnbj; output marst, `x'_srm, agesrm *******************/ * set mem 4g global path "/data/nta0/avi/data/china/china1990/ipums" global master "ebenstei_econ_berkeley_edu_012.dta" /* put name of master dataset here */ global ver "2" capture log close log using "$path/garba90srm_v$ver.log.txt", text replace capture erase "$path/garba90srm_resultsv$ver.log.txt" set linesize 192 set more off /* create a dataset with prefecture-level population by age, sex, ever married status */ use "$path/$master", clear /* this dataset has 1982 and 1990 values together */ keep if gq==10 & year==1990 /* keep family households in 1990 only */ tab mgcause [fw=wtper] /* not much migration */ collapse (sum) wtper, by(year provcn prefcn age sex marst) fast table marst, contents(sum wtper) format(%10,0f) /* most are single or married */ list if marst==9 /* ones where status is uknown are also age unknown */ gen evmar=(inlist(marst,2,3,4)) replace evmar=. if marst==9 save "$path/garba90marst.dta", replace collapse (sum) wtper, by(year provcn prefcn age sex) fast /* generate popn by province*prefecture*age*sex */ /* age population backwards 10 years to 1980 and forwards 10 years to 2000. */ * qui do "$path/../fertility.do" /* merges based on exact age of females; Sample Survey of Population Changes 2004 */ * qui do "$path/../mortality.do" /* merges based on year and age group; Banister & Hill 2004 */ capture program drop srm prog define srm { /* 1950 marriage law */ if "`1'"=="oldest" { gen e50m=(sex==1 & age>=20 & age<=`2') gen e50f=(sex==2 & age>=18 & age<=`3') } if "`1'"=="years" { gen e50m=(sex==1 & age>=20 & age<=20+`2') gen e50f=(sex==2 & age>=18 & age<=18+`3') } egen sr50m1=sum(wtper) if e50m==1, by(year prefcn) /* pref GB code (4-digit) includes prov+pref code */ egen sr50f1=sum(wtper) if e50f==1, by(year prefcn) egen sr50m2=max(sr50m1), by(year prefcn) egen sr50f2=max(sr50f1), by(year prefcn) by year prefcn, sort: gen srm50=(sr50m2/sr50f2)*100 drop e50* sr50* /* wanxishao era 1970-1979 */ if "`1'"=="oldest" { gen e70mu=(sex==1 & age>=25 & age<=`2') gen e70fu=(sex==2 & age>=27 & age<=`3') gen e70mr=(sex==1 & age>=23 & age<=`2') gen e70fr=(sex==2 & age>=25 & age<=`3') } if "`1'"=="years" { gen e70mu=(sex==1 & age>=25 & age<=25+`2') gen e70fu=(sex==2 & age>=27 & age<=27+`3') gen e70mr=(sex==1 & age>=23 & age<=23+`2') gen e70fr=(sex==2 & age>=25 & age<=25+`3') } egen sr70mr1=sum(wtper) if e70mr==1, by(year prefcn) egen sr70mu1=sum(wtper) if e70mu==1, by(year prefcn) egen sr70fr1=sum(wtper) if e70fr==1, by(year prefcn) egen sr70fu1=sum(wtper) if e70fu==1, by(year prefcn) egen sr70mr2=max(sr70mr1), by(year prefcn) /* eventually keep just the right measure for each prefecture */ egen sr70mu2=max(sr70mu1), by(year prefcn) egen sr70fr2=max(sr70fr1), by(year prefcn) egen sr70fu2=max(sr70fu1), by(year prefcn) by year prefcn, sort: gen srm70r=(sr70mr2/sr70fr2)*100 by year prefcn: gen srm70u=(sr70mu2/sr70fu2)*100 drop e70* sr70* /* 1980 marriage law */ if "`1'"=="oldest" { gen e80m=(sex==1 & age>=22 & age<=`2') gen e80f=(sex==2 & age>=20 & age<=`3') } if "`1'"=="years" { gen e80m=(sex==1 & age>=22 & age<=22+`2') gen e80f=(sex==2 & age>=20 & age<=20+`3') } egen sr80m1=sum(wtper) if e80m==1, by(year prefcn) egen sr80f1=sum(wtper) if e80f==1, by(year prefcn) egen sr80m2=max(sr80m1), by(year prefcn) egen sr80f2=max(sr80f1), by(year prefcn) by year prefcn, sort: gen srm80=(sr80m2/sr80f2)*100 drop e80* sr80* /* both starting age 20 */ if "`1'"=="years" { gen em=(sex==1 & age>=20 & age<=20+`2') gen ef=(sex==2 & age>=20 & age<=20+`3') } egen sr20m1=sum(wtper) if em==1, by(year prefcn) egen sr20f1=sum(wtper) if ef==1, by(year prefcn) egen sr20m2=max(sr20m1), by(year prefcn) egen sr20f2=max(sr20f1), by(year prefcn) by year prefcn, sort: gen srm20=(sr20m2/sr20f2)*100 drop em ef sr20* } end /***************************************** * syntax: * * srm [oldest] [years] X Y * * oldest is max age in marriage market * * years is # years spent in marriage mkt * * X is for males Y is for females * *****************************************/ srm years 10 10 save "$path/garba90agesrm.dta", replace /* create a dataset with just srm and prefcn for GIS map */ preserve collapse (mean) srm*, by(prefcn) tabstat srm*, stats(mean) by(prefcn) /* print srm for all prefectures */ forvalues x=4/6 { /* put into categories based on quantiles */ xtile srm20xt`x'=srm20,nq(`x') xtile srm80xt`x'=srm80,nq(`x') } log close log using "$path/garba90srm_resultsv$ver.log.txt", text append forvalues x=4/6 { /* print quintiles of srm */ table srm20xt`x', contents(mean srm20) table srm80xt`x', contents(mean srm80) } log close log using "$path/garba90srm_v$ver.log.txt", text append sort prefcn save "$path/garba90srm.dta", replace restore /* Table 1. table of SR for each province by age group by year */ use "$path/garba90agesrm.dta", clear collapse (sum) wtper, by(year provcn age sex) reshape wide wtper, i(year provcn age)j(sex) gen agecat=. replace agecat=0 if age<=14 replace agecat=1 if age>=15 & age<=19 replace agecat=2 if age>=20 & age<=24 replace agecat=3 if age>=25 & age<=29 replace agecat=4 if age>=30 & age<=34 replace agecat=5 if age>=35 & age<=39 replace agecat=6 if age>=40 & age<=44 replace agecat=7 if age>=45 & age<=49 replace agecat=8 if age>=50 egen males=sum(wtper1), by(year provcn agecat) egen females=sum(wtper2), by(year provcn agecat) gen agecatsr=males/females /* think about re-doing for counts of surplus males/females */ collapse (mean) agecatsr, by(year provcn agecat) lab def agecat 0 "0-14" 1 "15-19" 2 "20-24" 3 "25-29" 4 "30-34" 5 "35-39" 6 "40-44" 7 "45-49" 8 "50+" label values agecat agecat log close log using "$path/garba90srm_resultsv$ver.log.txt", text append table provcn year agecat, contents(mean agecatsr) twoway (line agecatsr agecat if provcn==11) (line agecatsr agecat if provcn==12) (line agecatsr agecat if provcn==31, ytitle("Ratio of Males to Females" " ") xtitle(" " "Age Category") xlabel(0 "0-14" 1 "15-19" 2 "20-24" 3 "25-29" 4 "30-34" 5 "35-39" 6 "40-44" 7 "45-49" 8 "50+") legend(order(1 "Beijing" 2 "Tianjin" 3 "Shanghai") ring(0) pos(7) col(3)) scheme(s2mono)), saving("$path/tab1.gph", replace) /* great graph, have to find a use for it */ graph export tab1.eps, replace log close log using "$path/garba90srm_v$ver.log.txt", text append /* Table 2. calculate surplus males at each age by decade */ use "$path/garba90agesrm.dta", clear drop srm* replace age=85 if age>=85 gen agecat=. forvalues a=0/17 { replace agecat=`a' if age>=5*`a' & age<=(5*`a')+4 } collapse (sum) wtper, by(year agecat sex) reshape wide wtper, i(year agecat)j(sex) lab def agecat 0 "0-4" 1 "5-9" 2 "10-14" 3 "15-19" 4 "20-24" 5 "25-29" 6 "30-34" 7 "35-39" 8 "40-44" 9 "45-49" 10 "50-54" 11 "55-59" 12 "60-64" 13 "65-69" 14 "70-74" 15 "75-79" 16 "80-84" 17 "85+" label values agecat agecat gen sr=wtper1/wtper2 /* show where sex ratio is above/below 1 */ gen sur=(wtper1-wtper2)/1000 /* show counts of surplus men by age in thousands */ /* defunct bar graph -- too jaggy without putting into bins; if in bins, table is as good as a graph. gen def=sur if sur<0 replace sur=. if sur<0 twoway (bar sur agecat if year==1990, sort) (bar def agecat if year==1990, sort) */ log close log using "$path/garba90srm_resultsv$ver.log.txt", text append table agecat year, contents(mean sur) /* look at the results -- doesn't look like cohorts advanced properly 1982-1990 */ log close log using "$path/garba90srm_v$ver.log.txt", text append /* Fig A. calculate percent unmarried at each age by quantile of srm */ use "$path/garba90marst.dta", clear replace age=65 if age>=65 /* avoid problem of low counts above age 65 */ collapse (sum) wtper, by(prefcn age sex evmar) reshape wide wtper,i(prefcn age evmar)j(sex) reshape wide wtper1 wtper2,i(prefcn age)j(evmar) egen totpop=rowtotal(wtper10 wtper11 wtper20 wtper21) egen totnom=rowtotal(wtper10 wtper20) egen totmar=rowtotal(wtper11 wtper21) egen totpopm=rowtotal(wtper10 wtper11) egen totpopf=rowtotal(wtper20 wtper21) ren wtper10 totnomm ren wtper20 totnomf ren wtper11 totmarm ren wtper21 totmarf egen cumpop=sum(totpop) if age>=15, by(prefcn) by prefcn, sort: gen summar=sum(totmar) gen surv=cumpop-summar /* this is somewhat meaningless */ gen pcmar=totmar/totpop gen pcnom=totnom/totpop /* can now do a graph for each prefecture */ sort prefcn merge prefcn using "$path/garba90srm.dta", nokeep /* can now do a graph for each quantile */ tab _m drop _m save "$path/garba90figa.dta", replace capture program drop figa prog define figa { if `1'==4 { local z="Quadrile" } if `1'==5 { local z="Quintile" } if `1'==6 { local z="Sextile" } use "$path/garba90figa.dta", clear collapse (sum) totpop totnom totpopf totnomf totpopm totnomm, by(`2'xt`1' age) /* raw sums */ gen pcnom=totnom/totpop gen pcnom2=pcnom gen pcnomf=totnomf/totpopf gen pcnomf2=pcnomf gen pcnomm=totnomm/totpopm gen pcnomm2=pcnomm label var pcnom "Lowest `z'" label var pcnom2 "Highest `z'" label var pcnomf "Lowest `z'" label var pcnomf2 "Highest `z'" label var pcnomm "Lowest `z'" label var pcnomm2 "Highest `z'" twoway (line pcnom age if `2'xt`1'==1 & age>=14 & age<=40) (line pcnom2 age if `2'xt`1'==`1' & age>=14 & age<=40, legend(order( - "Sex Ratio of the Marriage Market" 1 2) ring(0) pos(2) col(1)) xtitle(" ") ytitle("Share Never Married" " ") scheme(s2mono)), saving("$path/figa`1'`2'.gph", replace) graph export "$path/figa`1'`2'.eps", replace twoway (line pcnomf age if `2'xt`1'==1 & age>=14 & age<=40) (line pcnomf2 age if `2'xt`1'==`1' & age>=14 & age<=40, legend(order( - "Sex Ratio of the Marriage Market" 1 2) ring(0) pos(2) col(1)) xtitle(" ") ytitle("Share of Women Never Married" " ") scheme(s2mono)), saving("$path/figa`1'f`2'.gph", replace) graph export "$path/figa`1'f`2'.eps", replace twoway (line pcnomm age if `2'xt`1'==1 & age>=14 & age<=40) (line pcnomm2 age if `2'xt`1'==`1' & age>=14 & age<=40, legend(order( - "Sex Ratio of the Marriage Market" 1 2) ring(0) pos(2) col(1)) xtitle(" ") ytitle("Share of Males Never Married" " ") scheme(s2mono)), saving("$path/figa`1'm`2'.gph", replace) graph export "$path/figa`1'm`2'.eps", replace twoway (line pcnomf age if `2'xt`1'==1 & age>=14 & age<=40) (line pcnomf2 age if `2'xt`1'==`1' & age>=14 & age<=40) (line pcnomm age if `2'xt`1'==1 & age>=14 & age<=40) (line pcnomm2 age if `2'xt`1'==`1' & age>=14 & age<=40, legend(order( - "Sex Ratio of the Marriage Market" - "Males" 4 "Highest `z'" 3 "Lowest `z'" - "Females" 2 "Highest `z'" 1 "Lowest `z'") ring(0) pos(2) col(1) size(3)) xtitle(" ") ytitle("Share Never Married" " ") scheme(s2mono)), saving("$path/figa`1'a`2'.gph", replace) graph export "$path/figa`1'a`2'.eps", replace } end /****************************************** * syntax: * * figa X Y * * X is # quantiles desired for separation * * Y is marriage rule * ******************************************/ figa 5 srm80 figa 5 srm20 /* Fig B. calculate percent unmarried at each age by birth cohort (and a little table of SRB during those years */