log using annual2002, text replace set memory 2000m set more 1 *primary family and child variables added 2000-04-20 by Jean Roth, jroth@nber.org *Y2K compliance by Jean Roth, jroth@nber.org, 2001-01-15 *Imputed highest grade completed, ihigrdc, added by Jean Roth, jroth@nber.org, 2003-01-20 *Stata 8 compliance by Jean Roth, jroth@nber.org, 2003-02-28 *matching variables added by Jean Roth, jroth@nber.org, 2003-02-12 ! /bin/rm -f /tmp/aef.raw aef.dct dcthead * run with stata6 -p0 do annual2002 to get Stata 5 format * run with nohup stata -b do annual2002 & to run in batch mode ! echo >dcthead dictionary using /tmp/aef.raw program define aef2 display "time is $S_TIME" * '-A' are missing value indicators in the earlier files. if `1' < 1994 { !zcat /home/data/morg/raw/morg`2'.Z |tr "\-A" " " >/tmp/aef.raw } if `1' >= 1994 { !zcat /home/data/morg/raw/morg`2'.Z >/tmp/aef.raw } ! cat /home/data/morg/sources/dcthead /home/data/morg/sources/aef`3'.dbd >./aef.dct quietly infile using aef if age>15&age!=. #delimit cr local latest_year=2003 * Miscellaneous (record keeping) variables if `1' == 1994 { replace hhid = hhid94 drop hhid94 } if `1'== 1995 { replace hhid = hhid94 if intmonth<9 drop hhid94 } if `1' > 1995 & `1' <= 1997 { drop hhid94 } * Geography display "Geography" generate int year = `1' if `1' > 1995 { replace smsa93 = . if smsa93<2 } display "smsasize = . if smsasize <=0" replace smsasize = . if smsasize <=0 if `1' == 1995 { replace smsastat = smsa995 if intmonth>8 replace icntcity = icntct94 if intmonth<9 replace centcity = centcitx if intmonth<9 replace smsa93 = . if intmonth>=9 & smsa93<2 replace smsa93 = . if intmonth<9 drop pmsarank generate smsa80 = smsasize if intmonth < 10 } if `1' > 1995 & `1' < 1998 { replace smsastat = smsa995 } display "replace smsastat = . if smsastat==3 | smsastat<0" replace smsastat = . if smsastat==3 | smsastat<0 if `1' == 1994 { drop smsa93 replace centcity = centcitx replace icntcity = icntct94 replace pmsarank = . if intmonth > 3 replace pmsarank = . if pmsarank <= 0 replace cmsarank = . if intmonth > 3 } if `1' > 1995 & `1' <= 1997 { drop pmsarank } if `1' > 1993 & `1' < 1998 { drop centcitx icntct94 smsa995 } if `1' > 1993 { replace hurespli =. if hurespli <0 replace peinusyr = . if peinusyr < 0 } display "replace centcity = . if centcity==4 | centcity<=0" replace centcity = . if centcity==4 | centcity<=0 if `1' >= 1986 & `1' < 1994 { replace pmsarank = . if pmsarank<=0 } if `1' >= 1995 & `1' <= 1997 { drop cmsarank } if `1' >= 1986 { replace icntcity = . if icntcity<=0 } if `1' > 1984 & `1' < 1995 { replace cmsarank = . if cmsarank<=0 } if `1' >= 1989 { replace msafips = . if msafips <=0 replace cmsacode = . if cmsacode<=0 } if `1' == 1983 { drop docc70 occ70 } if `1' == 1985 { replace smsarank = . if intmonth > 9 } if `1' == 1984 { display "drop icntcity pmsarank cmsarank" drop icntcity pmsarank cmsarank } if `1' == 1985 & intmonth < 10 { drop icntcity pmsarank cmsarank } if `1' == 1994 { replace cmsarank = . if intmonth>3 } display "replace smsasize=. if smsasize<=0" **should line below be commented out??? replace smsasize=. if smsasize<=0 if `1' < 1985 { display "generate byte smsa70 = smsasize" generate byte smsa70 = smsasize } if `1' > 1985 & `1' < 1995 { generate byte smsa80 = smsasize } if `1' == 1985 { generate byte smsa70 = smsasize if intmonth<10 } if `1' == 1985 { generate byte smsa80 = smsasize if intmonth>9 } display "drop smsasize" drop smsasize if `1' > 1985 & `1' < 1989 { drop smsarank } if `1' < 1983 { display "drop unioncov unionmme occ80 ind80" drop unioncov unionmme occ80 ind80 } * Demography display "Demography" if `1' > 1983 & `1' < 1994 { replace ownchild = . if ownchild ==0 replace ownchild = ownchild - 1 generate int ch613=chldpres generate int ch1417=chldpres } display "if `1' > 1983 & `1' < 1989 {" if `1' > 1983 & `1' < 1989 { generate int ch05= chldpres recode ch05 (4 6/8 = 1) (0/3 5 = 0) recode ch613 (3 5 7/8 = 1) (0/2 4 6 = 0) recode ch1417 (2 5/6 8 =1) (0/1 3/4 7 = 0) } if `1' > 1988 & `1' < 1994 { display "if `1' > 1988 & `1' < 1994 {" generate int ch02 = chldpres generate int ch35 = chldpres recode ch02 (2 6/8 12/14 16 = 1) (0/1 3/5 9/11 15 = 0) recode ch35 (3 6 9/10 12/13 15/16 = 1) (0/2 4/5 7/8 11 14 = 0) recode ch613 (4 7 9 11/12 14/16 = 1) (0/3 5/6 8 10 13 = 0) recode ch1417 (5 8 10/11 13/16 = 1) (0/4 6/7 9 12 = 0 ) generate int ch05 = ch02 replace ch05 = ch35 if ch35 == 1 label variable ch02 "Children 0-2" label variable ch35 "Children 3-5" } if `1' > 1983 & `1' < 1994 { display "if `1' > 1983 & `1' < 1994 {" label variable ch05 "Children 0-5" label variable ch613 "Children 6-13" label variable ch1417 "Children 6-17" } if `1' == 1998 { drop ownchild chldpres } if `1' >= 1999 { replace ownchild = . if ownchild < 0 replace chldpres = . if chldpres < 0 generate int ch02 = chldpres generate int ch35 = chldpres generate int ch613 = chldpres generate int ch1417 = chldpres recode ch02 (1 5/7 11/13 15 = 1) (0 2/4 8/10 14 = 0) recode ch35 (2 5 8/9 11/12 14/15 = 1) (0/1 3/4 6/7 10 13 =0) recode ch613 (3 6 8 10/11 13/15 = 1) (0/2 4/5 7 9 12 = 0) recode ch1417 (4 7 9/10 12/15 = 1) (0/3 5/6 8 11 = 0) generate int ch05 = ch02 replace ch05 = ch35 if ch35 == 1 } if `1' == 1999 { replace ownchild = . if intmonth < 10 replace chldpres = . if intmonth < 10 replace ch613 = . if intmonth < 10 replace ch1417 = . if intmonth < 10 replace ch05 = . if intmonth < 10 replace ch02 = . if intmonth < 10 replace ch35 = . if intmonth < 10 } if `1' > 1988 & `1' < 1992 { drop grade92 } if `1' < 1989 { replace gradeat = gradeat - 1 replace marital = 7 if marital==5 } if `1' > 1991 { replace grade92 = . if grade92 < 0 } if `1' >= 1998 { replace ged = . if ged < 0 replace gedhigr = . if gedhigr < 0 replace yrcoll = . if yrcoll < 0 replace grprof = . if grprof < 0 replace gr6cor = . if gr6cor < 0 replace ms123 = . if ms123 < 0 generate double ihigrdc = . label var ihigrdc "Imputed highest grade completed" replace ihigrdc = 0 if grade92==31 replace ihigrdc = 2.5 if grade92==32 replace ihigrdc = 5.5 if grade92==33 replace ihigrdc = 7.5 if grade92==34 replace ihigrdc = 9 if grade92==35 replace ihigrdc =10 if grade92==36 replace ihigrdc =11 if grade92==37 replace ihigrdc =12 if grade92==38 replace ihigrdc = 0 if grade92==39 & ged==2 & gedhigr==1 replace ihigrdc = 2.5 if grade92==39 & ged==2 & gedhigr==2 replace ihigrdc = 5.5 if grade92==39 & ged==2 & gedhigr==3 replace ihigrdc = 7.5 if grade92==39 & ged==2 & gedhigr==4 replace ihigrdc = 9 if grade92==39 & ged==2 & gedhigr==5 replace ihigrdc = 10 if grade92==39 & ged==2 & gedhigr==6 replace ihigrdc = 11 if grade92==39 & ged==2 & gedhigr==7 replace ihigrdc = 12 if grade92==39 & ged==2 & gedhigr==8 replace ihigrdc = 12 if ged==1 replace ihigrdc = 12 if grade92>=40 & grade92<=42 & yrcoll==1 replace ihigrdc = 13 if grade92>=40 & grade92<=42 & yrcoll==2 replace ihigrdc = 14 if grade92>=40 & grade92<=42 & yrcoll==3 replace ihigrdc = 15 if grade92>=40 & grade92<=42 & yrcoll==4 replace ihigrdc = 16 if grade92>=40 & grade92<=42 & yrcoll==5 replace ihigrdc = 16 if grade92==43 & grprof==2 replace ihigrdc = 17 if grade92==43 & gr6cor==2 replace ihigrdc = 18 if grade92==43 & gr6cor==1 replace ihigrdc = 17 if grade92==44 & ms123==1 replace ihigrdc = 18 if grade92==44 & ms123>=2 & ms123<. replace ihigrdc = 18 if grade92==45 | grade92==46 } if `1' == 1992 | `1' == 1993 { drop gradecp gradeat } if `1' == 1995 { replace relref94 = . if intmonth > 2 replace relref95 = . if intmonth < 3 } if `1' > 1995 { drop relref94 } if `1' == 1994 { drop relref95 } if `1' >= 1994 { replace veteran = . if veteran < 0 replace penatvty = . if penatvty < 0 replace pemntvty = . if pemntvty < 0 replace pefntvty = . if pefntvty < 0 replace prcitshp = . if prcitshp < 0 replace prcitflg = . if prcitflg < 0 } if `1' > 1989 { replace ethnic = . if ethnic < 0 replace ethnic = . if ethnic >= 10 } * Employment display "Employment" if `1' >= 1994 { replace occ80 = . if occ80 <0 replace ind80 = . if ind80 < 10 replace class94 = . if class94<0 } #delimit ; display "docc80"; if `1' >= 1983 { display "generate int docc80=occ80;"; generate int docc80=occ80; display "recode docc80"; recode docc80 (1/6=1) (7/22=2) (23/37=3) (44/59=4) (64/68=5) (69/83=6) (84/89=7) (95/106=8) (113/154=9) (155/159=10) (178/179=11) (43 63 163/177 183/199=12) (203/208=13) (213/225=14) (226/235=15) (243=16) (253/257=17) (258/259=18) (263/278=19) (283/285=20) (303/307=21) (308/309=22) (313/315=23) (337/344=24) (354/357=25) (316/336 345/353 359/389=26) (403/407=27) (413/427=28) (433/444=29) (445/447=30) (448/455=31) (456/469=32) (503/549=33) (553/599=34) (613/699=35) (703/779=36) (783/799=37) (803/814=38) (823/859=39) (869=40) (875/883=41) (863/868 873 874 885/889=42) (473/476=43) (477/489=44) (494/499=45) (905=46); }; display "dind"; if `1'<= 1982 { display "`1' <= 1982: creating dind from ind70"; replace dind=ind70 ; recode dind (18/19=1) (17=2) (47/57=3) (67/77=4) (107/109=5) (118=6) (119/138=7) (139/149=8) (157/168 258=9) (169=10) (177/198=11) (199/209=12) (219 =13) (227=14) (228/238=15) (239/257=16) (259=18) (268/298=19) (299=20) (307/318=21) (319/327=22) (328/337=23) (338/339=24) (347/369=25) (377/378=26) (379/387=27) (388/398=28) (407/429=29) (447/449=30) (467/479=31) (507/588=32) (607/698=33) (707/709=34) (717/718=35) (769=36) (727/748=37) (749/759=38) (777/798=39) (807/809=40) (838=41) (828/837 839/848=42) (877/879=44) (857/868=43) (849 869 887/897= 45) (027 028 = 46) (907/937=52) ; } ; if `1' <=1991 & `1' >=1983 { display "replacing dind for `1': `1' <=1991 & `1' >=1983"; replace dind=ind80; recode dind (12/29=1) (10/11=2) (40/50=3) (60=4) (230/241=5) (242=6) (250/262=7) (270/280=8) (281/300=9) (301=10) (310/332=11) (340/350=12) (351=13) (352=14) (360/370=15) (371/382=16) (390=17) (391/392=18) (100/122=19) (130=20) (132/150=21) (151/152=22) (160/162=23) (171/172=24) (180/192=25) (200/201=26) (210/212=27) (220/222=28) (400/432=29) (440/442=30) (450/472=31) (500/571=32) (580/691=33) (700/710=34) (711/712=35) (761=36) (721/750=37) (751/760=38) (762/791=39) (800/810=40) (831=41) (812/830 832/840=42) (842/860=43) (861/871=44) (841 872/893=45) (30/32=46) (900/932=52) (991=51); } ; if `1' >= 1992 { display "replacing dind for `1': `1' >= 1992"; replace dind=ind80; recode dind (12/30=1) (10/11=2) (40/50=3) (60=4) (230/241=5) (242=6) (250/262=7) (270/280=8) (281/300=9) (301=10) (310/332=11) (340/350=12) (351=13) (352=14) (360/370=15) (371/382=16) (390=17) (391/392=18) (100/122=19) (130=20) (132/150=21) (151/152=22) (160/162=23) (171/172=24) (180/192=25) (200/201=26) (210/212=27) (220/222=28) (400/432=29) (440/442=30) (450/472=31) (500/571=32) (580/691=33) (700/710=34) (711/712=35) (761=36) (721/750=37) (751/760=38) (762/791=39) (800/810=40) (831=41) (812/830 832/840=42) (842/860=43) (861/871=44) (841 872/893 =45) (032 031 =46) (910 922 932 900 901 921 930 931 =52) (991=51) ; }; #delimit cr * Wages (If earnings are 0, then wage rate is missing, not zero). display "Wages" replace paidhr = . if paidhr <=0 replace paidhre = . if paidhre <=0 replace earnhr = . if earnhr <=0 replace earnhre = . if earnhre <=0 replace earnwke = . if earnwke < 0 if `1' < 1989 { replace eligible = 2 if eligible == . } if `1' >= 1989 { replace eligible = 2 if eligible <= 0 } if `1' < 1989 { replace I25a = 0 if I25a ==. replace I25b = 0 if I25b ==. replace I25c = 0 if I25c ==. replace I25d = 0 if I25d ==. } if `1' >= 1994 { replace I25a = 0 if I25a <= 3 replace I25b = 0 if I25b <= 3 *replace I25a = . if I25a < 0 *replace I25b = . if I25b < 0 replace earnhre = . if earnhre ==1 replace uhourse = . if lfsr94 >2 | lfsr94 <1 replace lfsr94 = . if lfsr94 < 0 } if `1' >= 1996 { replace I25c = . if I25c < 0 replace I25d = . if I25d < 0 } if `1' == 1995 { replace I25c = . if I25c < 0 replace I25d = . if I25d < 0 replace I25c = . if intmonth < 9 replace I25d = . if intmonth < 9 } if `1' == 1994 { drop I25c I25d } * Hours, Unions display "Hours, Unions" if `1' >= 1994 { replace reason94 = . if reason94< 0 replace absent94 = . if absent94< 0 replace studftpt = . if studftpt< 0 replace ftpt94 = . if ftpt94 < 0 replace hourslw = . if hourslw < 0 replace hourslw = 99 if hourslw >99 & hourslw != . replace uhourse = . if uhourse < 0 replace why3594 = . if why3594 < 0 replace unionmme = . if unionmme< 0 replace unioncov = . if unioncov< 0 } * Person Match display "Person Match" *mym is the year and month of the file where matching observations would be generate int mym = `1' * 10 + 18 if minsamp==4 replace mym =`1' * 10 -6 if minsamp==8 label var mym "Match year and month-in-sample" display "Generate id" gen id = _n display "Generate Sorting variables" display "Sort vars" local sort mym intmonth state hhid hhnum local sortnf `sort' lineno local sortf `sort' famnum lineno local sort94 `sort' famnum lineno serial if ( `1' <= 1984 ) { local sort `sortnf' } if ( `1' > 1984 & `1' <= 1994 ) { local sort `sortf' } if ( `1' > 1994 ) { local sort `sort94' } display "sort `sort'" sort `sort' id display "Household/family-level match" display "WARNING: This merge will generate extra observations display "if obs with duplicate merge variables aren't eliminated" display "re http://www.stata.com/support/faqs/data/merge.html" local prevyear=`1'-1 ** Matching is not possible due to sample redesigns between ** Jan to Sep 1985 and 1986 ** Jul to Dec 1984 and 1985 ** Jun to Dec 1994 and 1995 ** Jan to Aug 1995 and 1996 ** Match minsamp 8s to minsamp 4s **( 79:8s don't have matches ) if (`1' > 1979 ) { capture describe scalar obs_pre = r(N) *** match files are created using matchYYYY.do display "Merging `1' 8s to 4s using match`prevyear'4.dta " merge `sort' using /homes/data/morg/match/match`prevyear'4.dta display "Drop unmatched obs from using data" drop if _merge==2 capture describe scalar obs_post = r(N) assert obs_pre == obs_post drop _merge } ** Match minsamp 4s to minsamp 8s ** No matches yet for most recent year's minsamp 4s if ( `1' < `latest_year' ) { local nextyear = `1' + 1 display "sort `sort' id" sort `sort' id capture describe scalar obs_pre = r(N) display "Merging `1' 4s to 8s using match`nextyear'8.dta " merge `sort' using /homes/data/morg/match/match`nextyear'8.dta display "Drop unmatched obs from the using data" drop if _merge==2 capture describe scalar obs_post = r(N) assert obs_pre == obs_post drop _merge } ** Create dummy variables to verify that sex, race, and age match. ** A value of 1 indicates a match. Zero means no match. gen byte sexdif = sex == msex gen byte racedif = race == mrace ** Fix race coding scheme for 88:4 and 89:8 if ( `1'==1988 & minsamp == 4 ) { replace racedif=1 if race==3 & mrace>3 & mrace<. } else if ( `1'==1989 & minsamp == 8 ) { replace racedif=1 if race>3 & race<. & mrace==3 } ** In 2003, greatly expanded race categories were used. ** Over 98% of 2003:8 chose one race category. tab mrace if `1'==2003 & minsamp==8 & race > 3 & race < . gen byte age_mage=age-mage gen byte agedif=(age_mage>=-1 & age_mage==3 ) gen byte match=0 replace match=1 if sexdif+racedif+agedif==3 ** Make long personid string tostring intmonth, gen( pid_intmonth ) format( %02.0f ) tostring hhid, gen( pid_hhid ) format ( %015.0f ) tostring lineno, gen( pid_lineno ) format( %02.0f ) ** Race and sex are the same, but the matching age should be included tostring mage, gen( pid_mage ) format( %02.0f ) replace pid_mage = "99" if mage==. ** Match variables ** Create match variables for each time period ** A '_428' variable is used to match minsamp 4 to minsamp 8 ** An '_824' variable is used to match minsamp 4 to minsamp 8 local match_428 year minsamp pid_intmonth state pid_hhid hhnum local match_824 mym pid_intmonth state pid_hhid hhnum local mid79_428 `match_428' pid_lineno sex race age pid_mage local mid79_824 `match_824' pid_lineno sex race pid_mage age local mid85_428 `match_428' famnum pid_lineno sex race pid_age pid_mage local mid85_824 `match_824' famnum pid_lineno sex race pid_mage age local mid89_824 `match_824' famnum pid_lineno sex pid_race pid_mage age local mid94_428 `match_428' famnum pid_lineno pid_ser sex race age pid_mage local mid94_824 `match_824' famnum pid_lineno pid_ser sex race pid_mage age ** Pre-1984 matches ( includes 84:8 which matches to 83:4 ) if (`1' < 1984 | ( `1' == 1984 & minsamp==8 ) ) { local match_428 `match_428' `mid79_428' display "match_428 is " `"`match_428'"' local match_824 `match_824' `mid79_824' } ** Family number became available in 1984 for matching 84:4s to 85:8s on. if (`1' == 1984 & minsamp==4 ) { local match_428 `mid85_428' } if ((`1'>1984 & `1'<1989)|( `1'==1989 & minsamp==8 )|(`1'>1989 & `1'<=1994)) { local match_428 `match_428' `mid85_428' local match_824 `match_824' `mid85_824' } ** 88:4's have a single "3 = Other" race code. Their matches, ** 89:8's, split into three race codes, American Indian, API, and Other if (`1' == 1989 & minsamp==8 ) { gen byte pid_race = race replace pid_race=3 if minsamp==8 & race>3 & race<. & mrace==3 local match_824 `match_824' `mid79_824' } ** Serial suffix (extra unit id) became available in 1994 for matching if (`1' > 1994 ) { ** Make uppercase and remove leading and trailing blanks replace serial = upper(trim( serial )) ** Make numeric version of serial egen pid_serial = group( serial ) ** A=2 rather than 1 b/c the missing value -1 becomes 1. So, make A=1, etc. assert pid_ser == 2 if serial == "A" replace pid_ser = pid_ser - 1 local match_428 `match_428' `mid94_428' local match_824 `match_824' `mid94_824` } egen str35 match_428 = concat(`match_428') egen str35 match_824= concat(`match_824') gen matchid = match_428 replace matchid = match_824 if minsamp == 8 drop pid_* * Save and exit compress saveold /home/data/morg/annual/morg`2',replace summarize ! /bin/chmod a+r /home/data/morg/annual/m*.dta ! /bin/chmod g+w /home/data/morg/annual/m*.dta ! /bin/chgrp web /home/data/morg/annual/m*.dta clear ! /bin/rm -f aef.dct /tmp/aef.raw end * Each block of commands does one year of the data. The data must be * decompressed, and possibly have dashes converted to blanks. * Then a dictionary for the particular year is prepared by * concatenating a one line header (with the file name) to a generic * dictionary body that covers several years of data that used the * same format. *Lastly, the data is read, modified, summarized and saved. aef2 1979 79 79_83 aef2 1980 80 79_83 aef2 1981 81 79_83 aef2 1982 82 79_83 aef2 1983 83 79_83 aef2 1984 84 84_88 aef2 1985 85 84_88 aef2 1986 86 84_88 aef2 1987 87 84_88 aef2 1988 88 84_88 aef2 1989 89 89_93 aef2 1990 90 89_93 aef2 1991 91 89_93 aef2 1992 92 89_93 aef2 1993 93 89_93 aef2 1994 94 94_97 aef2 1995 95 94_97 aef2 1996 96 94_97 aef2 1997 97 94_97 aef2 1998 98 98 aef2 1999 99 98 aef2 2000 00 98 aef2 2001 01 98 aef2 2002 02 98