/* This is a program designed to calculate the components of rural household income for mainland China, using the datasets Ruralh and Ruralp */ * ===================== part one ====================== *; /* Part one: (Module one) Set initial dataset */ /* Part one: (Module two) Create values for the variables that are missing that cannot be allowed to be equal to zero. These variables are prices for self-consumed agricultural products (b903a_1 b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2) rental value of housing (b806_2), relief payments (b504b), and the size of house in square metres (b1002) */ /* Part one: (Module three) Set the erroneous prices equal to county average */ * ===================== module one ======================== * ; /* USERS NEED TO MODIFY THE LIBNAME STATEMENT TO SUIT THEIR CONFIGURATIONS */ *libname china95 '\sasdata\china95'; libname china95 '~/ipums/china/china1995/sasdata'; /* create temporary dataset=aa in which are located all the variables of interest as well as province, county and household id */ data aa; set china95.ruralh(keep=a1 b101 province county b903a_1 nhh b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2 b806_2 b504b b1002 b901_1 b901_2 b901_3 b901a_1 b901a_2 b901a_3); dummy=1; * ====================== end module I ==================== * ; * ==================== module II ==================== * ; /* begin by setting all the zeros equal to missing */ Array AA (I) b903a_1 b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2 b806_2 b504b b1002; do over AA ; if AA=0 then AA=.; end; /* then calculate the county average */ proc means data=aa mean noprint; var b903a_1 b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2 b806_2 b504b b1002; output out=a1 mean=b903a_1m b903a_2m b903a_3m b904a_2m b904b_2m b904c_2m b904d_2m b904e_2m b904f_2m b904g_2m b806_2m b504bm b1002m; by province county; /* then calculate the province average */ proc means data=aa mean noprint; var b903a_1 b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2 b806_2 b504b b1002; output out=a2 mean=pb903a_1 pb903a_2 pb903a_3 pb904a_2 pb904b_2 pb904c_2 pb904d_2 pb904e_2 pb904f_2 pb904g_2 pb806_2 pb504b pb1002; by province; /* then calculate the country average */ proc means data=aa mean noprint; var b903a_1 b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2 b806_2 b504b b1002; output out=a mean=db903a_1 db903a_2 db903a_3 db904a_2 db904b_2 db904c_2 db904d_2 db904e_2 db904f_2 db904g_2 db806_2 db504b db1002; by dummy; /* merge them all together */ data a3; set aa; run; data b; merge a3 a1; by province county; run; data b; merge b a2; by province; run; data b2; merge b a; by dummy; run; data c; set b2; /* replace the values by the various means */ if b903a_1=. and b903a_1m>0 then b903a_1=b903a_1m; else if b903a_1=. and pb903a_1>0 then b903a_1=pb903a_1; else if b903a_1=. then b903a_1=db903a_1; if b903a_2=. and b903a_2m>0 then b903a_2=b903a_2m; else if b903a_2=. and pb903a_2>0 then b903a_2=pb903a_2; else if b903a_2=. then b903a_2=db903a_2; if b903a_3=. and b903a_3m>0 then b903a_3=b903a_3m; else if b903a_3=. and pb903a_3>0 then b903a_3=pb903a_3; else if b903a_3=. then b903a_3=db903a_3; if b904a_2=. and b904a_2m>0 then b904a_2=b904a_2m; else if b904a_2=. and pb904a_2>0 then b904a_2=pb904a_2; else if b904a_2=. then b904a_2=db904a_2; if b904b_2=. and b904b_2m>0 then b904b_2=b904b_2m; else if b904b_2=. and pb904b_2>0 then b904b_2=pb904b_2; else if b904b_2=. then b904b_2=db904b_2; if b904c_2=. and b904c_2m>0 then b904c_2=b904c_2m; else if b904c_2=. and pb904c_2>0 then b904c_2=pb904c_2; else if b904c_2=. then b904c_2=db904c_2; if b904d_2=. and b904d_2m>0 then b904d_2=b904d_2m; else if b904d_2=. and pb904d_2>0 then b904d_2=pb904d_2; else if b904d_2=. then b904d_2=db904d_2; if b904e_2=. and b904e_2m>0 then b904e_2=b904e_2m; else if b904e_2=. and pb904e_2>0 then b904e_2=pb904e_2; else if b904e_2=. then b904e_2=db904e_2; if b904f_2=. and b904f_2m>0 then b904f_2=b904f_2m; else if b904f_2=. and pb904f_2>0 then b904f_2=pb904f_2; else if b904f_2=. then b904f_2=db904f_2; if b904g_2=. and b904g_2m>0 then b904g_2=b904g_2m; else if b904g_2=. and pb904g_2>0 then b904g_2=pb904g_2; else if b904g_2=. then b904g_2=db904g_2; if b806_2=. and b806_2m>0 then b806_2=b806_2m; else if b806_2=. and pb806_2>0 then b806_2=pb806_2; else if b806_2=. then b806_2=db806_2; if b504b=. and b504bm>0 then b504b=b504bm; else if b504b=. and pb504b>0 then b504b=pb504b; else if b504b=. then b504b=db504b; if b1002=. and b1002m>0 then b1002=b1002m; else if b1002=. and pb1002>0 then b1002=pb1002; else if b1002=. then b1002=db1002; * =====================end of module II ==================; /* module III supplement */ if b903a_1m=. then b903a_1m=pb903a_1; else b903a_1m=b903a_1m; if b903a_2m=. then b903a_2m=pb903a_2; else b903a_2m=b903a_2m; if b903a_3m=. then b903a_3m=pb903a_3; else b903a_3m=b903a_3m; if b904a_2m=. then b904a_2m=pb904a_2; else b904a_2m=b904a_2m; if b904b_2m=. then b904b_2m=pb904b_2; else b904b_2m=b904b_2m; if b904c_2m=. then b904c_2m=pb904c_2; else b904c_2m=b904c_2m; if b904d_2m=. then b904d_2m=pb904d_2; else b904d_2m=b904d_2m; if b904e_2m=. then b904e_2m=pb904e_2; else b904e_2m=b904e_2m; if b904f_2m=. then b904f_2m=pb904f_2; else b904f_2m=b904f_2m; if b904g_2m=. then b904g_2m=pb904g_2; else b904g_2m=b904g_2m; if b903a_1m=. then b903a_1m=db903a_1; else b903a_1m=b903a_1m; if b903a_2m=. then b903a_2m=db903a_2; else b903a_2m=b903a_2m; if b903a_3m=. then b903a_3m=db903a_3; else b903a_3m=b903a_3m; if b904a_2m=. then b904a_2m=db904a_2; else b904a_2m=b904a_2m; if b904b_2m=. then b904b_2m=db904b_2; else b904b_2m=b904b_2m; if b904c_2m=. then b904c_2m=db904c_2; else b904c_2m=b904c_2m; if b904d_2m=. then b904d_2m=db904d_2; else b904d_2m=b904d_2m; if b904e_2m=. then b904e_2m=db904e_2; else b904e_2m=b904e_2m; if b904f_2m=. then b904f_2m=db904f_2; else b904f_2m=b904f_2m; if b904g_2m=. then b904g_2m=db904g_2; else b904g_2m=b904g_2m; * =====================module III ======================= ; if (.5*b903a_1>b903a_1m or b903a_1m>2*b903a_1) then b903a_1=b903a_1m; else b903a_1=b903a_1; if (.5*b903a_2>b903a_2m or b903a_2m>2*b903a_2) then b903a_2=b903a_2m; else b903a_2=b903a_2; if (.5*b903a_3>b903a_3m or b903a_3m>2*b903a_3) then b903a_3=b903a_3m; else b903a_3=b903a_3; if (.5*b904a_2>b904a_2m or b904a_2m>2*b904a_2) then b904a_2=b904a_2m; else b904a_2=b904a_2; if (.5*b904b_2>b904b_2m or b904b_2m>2*b904b_2) then b904b_2=b904b_2m; else b904b_2=b904b_2; if (.5*b904c_2>b904c_2m or b904c_2m>2*b904c_2) then b904c_2=b904c_2m; else b904c_2=b904c_2; if (.5*b904d_2>b904d_2m or b904d_2m>2*b904d_2) then b904d_2=b904d_2m; else b904d_2=b904d_2; if (.5*b904e_2>b904e_2m or b904e_2m>2*b904e_2) then b904e_2=b904e_2m; else b904e_2=b904e_2; if (.5*b904f_2>b904f_2m or b904f_2m>2*b904f_2) then b904f_2=b904f_2m; else b904f_2=b904f_2; if (.5*b904g_2>b904g_2m or b904g_2m>2*b904g_2) then b904g_2=b904g_2m; else b904g_2=b904g_2; /* get rid of all the averages */ drop b903a_1m b903a_2m b903a_3m b904a_2m b904b_2m b904c_2m b904d_2m b904e_2m b904f_2m b904g_2m b806_2m b504bm b1002m pb903a_1 pb903a_2 pb903a_3 pb904a_2 pb904b_2 pb904c_2 pb904d_2 pb904e_2 pb904f_2 pb904g_2 pb806_2 pb504b pb1002 db903a_1 db903a_2 db903a_3 db904a_2 db904b_2 db904c_2 db904d_2 db904e_2 db904f_2 db904g_2 db806_2 db504b db1002 dummy I; run; /* part one is finished and we have a dataset=c with the following varibles (a1 a2 b101 b903a_1 b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2 b806_2 b504b b1002 RN b901_1 b901_2 b901_3 b901a_1 b901a_2 b901a_3) */ * ===================== end module III =================== *; * ==================== end of part one ================== * ; * ===================== part two ======================= *; /* part two will calculate the household sums of all the variables which are individual specific, and will generate a dataset with those household sums in it */ data e; set china95.ruralp(keep=a1 b101 b201 b202 b203 b204 b204a b204b b205 b206 b207 b208); yb202=12*b202; if (b204a+b204b)>b204 then b204=(b204a+b204b); else b204=b204; run; proc means data=e sum noprint; var b201 yb202 b203 b204 b205 b206 b207 b208; output out=f sum=sb201 syb202 sb203 sb204 sb205 sb206 sb207 sb208; by a1 b101; run; /* part two is finished and we have a dataset=f which has the following variables (sb201 syb202 sb203 sb204 sb205 sb206 sb207 sb208) */ * ================== end of part two ===================== *; * ===================== part three ======================= *; /* in this section I will do two things. First, I will extract the remaining household level data that I need to calculate income. Then I will combine that with the two previously constructed datasets to get a new dataset with all the individual components of household income */ data g; merge china95.ruralh(keep=a1 b101 b412 b503 b501_1 b501_2 b717 b502_1 b502_2 b806_4 b904a_1 b904b_1 b904c_1 b904d_1 b904e_1 b904f_1 b904g_1 b507 b508 b509 b807a b504c b708b b714 b715 b716 b718 b504a b505 b506 b510) c f; by a1 b101; run; /* now that is done and we have an output=g which has all the varibles needed to calculate household income */ * =================== end part three ==================== *; * ====================== part four ====================== *; /* in this section I will manipulate the variables to calculate RY1, RY2, RY3A, etc. as well as create a variable for the value of self-consumed items. */ /* first I will calculate the value of self consumed products */ data kk; set g; vb901a_1=b901a_1*b903a_1; vb901a_2=b901a_2*b903a_2; vb901a_3=b901a_3*b903a_3; vb904a_1=b904a_1*b904a_2; vb904b_1=b904b_1*b904b_2; vb904c_1=b904c_1*b904c_2; vb904d_1=b904d_1*b904d_2; vb904e_1=b904e_1*b904e_2; vb904f_1=b904f_1*b904f_2; vb904g_1=b904g_1*b904g_2; /* then I will calculate the rental value of home equity and the interest on housing debt */ rb806_4=.08*b806_4; rb807a=.08*b807a; run; data kk; set kk; /* now I will calculate the various income components */ RY1=sb201+syb202+sb203+sb204+sb205+sb207+sb208; RY2=sb206+b503; RY3A=b501_1-b501_2-b717; RY3B=b502_1-b502_2; RY4=vb901a_1+vb901a_2+vb901a_3+vb904a_1+vb904b_1+ vb904c_1+vb904d_1+vb904e_1+vb904f_1+vb904g_1; RY3=RY3A+RY3B-RY4; RY5=b507+b508+b509; RY6=rb806_4-rb807a; if RY6<0 then RY6=0; RY7=b504b+b504c+b708b-b714-b715-b716-b718; RY8=b504a+b505+b506+b510; RY=RY1+RY2+RY3A+RY3B+RY5+RY6+RY7+RY8; RYPC=RY/nhh; drop b901a_1 b901a_2 b901a_3 b904a_1 b904b_1 b904c_1 b904d_1 b904e_1 b904f_1 b904g_1 b903a_1 b903a_2 b903a_3 b904a_2 b904b_2 b904c_2 b904d_2 b904e_2 b904f_2 b904g_2 b806_2 b807a _type_ _freq_; run; /* USERS NEED TO UNCOMMENT THE CODE BELOW TO GENERATE A PERMANENT SAS DATASET FOR RURAL INCOME */ data china95.rincome; set kk; run;