/*File created by Jessamyn Schaller (JS) and updated/modified by Marianne Bitler (MB).
Elira Kuka (EK) updated this file to include 2011 and 2012 data.
NOTICE: EK wrote EK_CHECK in front of things still needed checking or thought. Plus EK marked as *EK
all her changes and comments that are OK.
Last update: 3/14/2013
MB modified 5/15/2013 to change definitions for young adult living arrangements
EK added 2013 CPS data on 12/09/13
DC added 2014 CPS data on 29/09/14
EK added TAXSIM taxes, credits, and poverty rates 05/22/15.
DC added UNICON data for CPS year 1988B and PROP_TAX and HOUSRET for CPS year 1991 on 8/27/2015
DC added 2015 CPS data on 12/20/2015
DC added redesigned 2014 CPS data on 2/15/2016
EK changed on 8/28/16 the name of variables *inc_cs to *inc_othpr to make sure 
we are aware that both CS/Alimony and other stuff is there.
KR added 2015 and 2016 files 4/7/2017
KR added 2017 file 2/5/2017 
*/
clear all
set more off, perm

capture log close
#delimit ;


*Install TAXSIM ado file (if necessary);
*KR updated this 2/6/2018 to run on the new (taxsim27) syntax;
*Note that variable order in taxsim has slightly changed, plus new var needed;
net from "http://www.nber.org/stata";
net describe taxsim27;
net install taxsim27, replace;

*EK adds local paths;
local march "/accounts/projects/hoynes/cycles/march/";
local spm_raw "/accounts/projects/hoynes/cycles/march/spm_raw";
local nas_raw "/accounts/projects/hoynes/cycles/march/nas_raw";
*EK ends;

cd "`march'";


*** NOTE STUFF WITH Z in front is for things to change at CURIE (MB) machine;
log using marchcpsfamilyhh_taxsim.log, replace;
*Z log using marchcpsfamilyhh-curie.log, replace;

!date;

*** Changlog;
*** was marchcpsfinal;
*** MB also changes to do family and HH stuff;

*** MB changes to use her dct files in mb directory;
*** has added some variables including total hh income for checking purposes;
*** All stuff related to small/big/CPS families is MB;
*** MB changed concept of head to be at least 15 years old;
*** MB removes some sample selection stuff because it is screwing something up;
*** MB adds bf alternate poverty measures;
*** MB fixes so ALL $ amounts are in real terms;
* EK adds r_ to all terms in real values;
* EK adds all NAS/SPM related material;
* EK adds all immigration code from the marchcpsfamilyhh_immig.do file

*** 10/12;
*** MB fixes to make unique ID so won't be issue with collapsing;
**  Note float is unique up to about 8 million;
**  If any of ids go above 8 million, we should switch type;
**  from float to double precision;
** Note MB checked and sfamid for 1980-88 is under 1 million;
** so we can get a unique set of IDS cross year by adding 1 million;
** to 89-201X ones;
*** Also MB fixes error in SF weights;

clear;
clear matrix;
set more off;


*This will be the master dofile that takes the raw March CPS data and *;
*creates the collapsed data file for years 1977-2014.*;
*on Baker*;
*Z on curie;
*Z cd /data/brook/cycles/march;


*** commented out for debugging;
*** all rerun recently by hand;
*This takes raw CPS files and turns them into marcps7788.dta*;
*do infile;
*This takes NBER CPS files and turns them into marcps89b17.dta*;
*do lateryears_recode.do;
* This takes thresholds from JS and updates for elderly HH *;
*do make-pov-threshold-right.do // EK: Not used anymore; 
* This takes guidelines from JS and updates for pre-82 and AK/HI *;
*do update_guidelines_pre-82.do;

clear;
set mem 10g;

/* Take out first years while debugging
**************************;
*    77-88               		  *;
**************************;

* for debugging just use CA 93 or 63 AL;
** use /home/research/cycles/march/marcps7788 if h_state==63;
** Zuse /data/brook/cycles/march/marcps7788 if h_state==63;
use marcps7788;
** Z use /data/brook/cycles/march/marcps7788 if year>=1980;
rename p_inc_cs p_inc_othpr;

** MB comment out;
** drop f_*;
** MB end;

egen hhid = group(year h_seq);


** MB to check with totals;
tab year, su(hhid);

*************************************************;
*Recoding Geographic Areas			*;
*************************************************;
recode h_state (11 = 23) (12 = 33) (13 = 50) (14 = 25) (15 = 44) (16 = 9) (21 = 36) (22 = 34) 
	(23 = 42) (31 = 39) (32 = 18) (33 = 17) (34 = 26) (35 = 55) (41 = 27) (42 = 19) (43 = 29) 
	(44 = 38) (45 = 46) (46 = 31) (47 = 20) (51 = 10) (52 = 24) (53 = 11) (54 = 51) (55 = 54) 
	(56 = 37) (57 = 45) (58 = 13) (59 = 12) (61 = 21) (62 = 47) (63 = 1) (64 = 28) (71 = 5) 
	(72 = 22) (73 = 40) (74 = 48) (81 = 30) (82 = 16) (83 = 56) (84 = 8) (85 = 35) (86 = 4) 
	(87 = 49) (88 = 32) (91 = 53) (92 = 41) (93 = 6) (94 = 2) (95 = 15);
rename h_state statefip;	
	
*No state*;
drop if statefip==0; 
gen division = 1 if statefip==9 | statefip==23 | statefip==25 | statefip==33 | statefip==44 | statefip==50;
	replace division = 2 if statefip==34 | statefip==36 | statefip==42;
	replace division = 3 if statefip==18 | statefip==17 | statefip==26 | statefip==39 | statefip==55;
	replace division = 4 if statefip==19 | statefip==20 | statefip==27 | statefip==29 | statefip==31 | statefip==38 | statefip==46;
	replace division = 5 if statefip==10 | statefip==11 | statefip==12 | statefip==13 | statefip==24 | statefip==37 | statefip==45 | statefip==51 | statefip==54;
	replace division = 6 if statefip==1 | statefip==21 | statefip==28 | statefip==47;
	replace division = 7 if statefip==5 | statefip==22 | statefip==40 | statefip==48;
	replace division = 8 if statefip==4 | statefip==8 | statefip==16 | statefip==35 | statefip==30 | statefip==49 | statefip==32 | statefip==56;
	replace division = 9 if statefip==2 | statefip==6 | statefip==15 | statefip==41 | statefip==53;

*This labels state and MSA values*;
do label; 

*Add Oakland and SF Together*;
replace h_smsa = 7360 if h_smsa==5775 & year>=1986;
*Add Boulder to Denver*;
replace h_smsa = 2080 if h_smsa==1125 & year>=1986;

tab statefip, missing;
tab division, missing;


***************************************************;
*Generating household variables*;
***************************************************;
*=Identified as HH Head*;
gen head = 1 if p_relhd==1 | p_relhd==2; 
*One person households*;
replace head = 1 if h_numpers==1; 

*** MB changes to head concept;
**** Note using count works here because head is . if not 1 ******;
* fill in 0s below *;
bysort hhid: egen headcount = count(head);
*** mb 2263 of these in 77-88;
tab  headcount;

*** check that MB definition of head is more sensible than JS *;
* Mark these households for checking characteristics of JS/MB head below *;
gen noheadhh = headcount==0;
*** JS picks first one ****;
*** Can lead to infants being heads ****;
*In HH's full of unrelated individuals, pick the first one as head*;
bysort hhid: gen h1 = 1 if _n==1; 
gen jshead = head;
replace jshead = 1 if h1==1 & headcount==0;
* MB assigns to person with h_ppind=p_pos, p_princ=1, and p_age >=15 *;
* this works for most, seems to not lead to 0 year olds being chosen *;
replace head = 1 if h_ppind ==p_pos & headcount==0 & p_princ==1 & p_age>=15;
* mark HH which are left with no head *;
drop headcount;
**** Note using count works here because head is . if not 1 ******;
* fill in 0s below ;
bysort hhid: egen headcount = count(head);
* Does not do every HH, are still some HH with noone, pick first adult 15 and up *;
* NEW with weight = HH weight*;
* code first adult 15 and older with right weight *;
gen t15plus =p_pos if p_age>=15 & h_weight==p_marwt;
bysort hhid: egen minp_posif15 = min(t15plus);
*pause on;
*pause;
replace head =1 if minp_posif15 == p_pos & headcount==0 & p_age>=15;
* Count again *;
drop headcount;
** tab count of MB measure *;
**** Note using count works here because head is . if not 1 ******;
* fill in 0s below *;
bysort hhid: egen headcount = count(head);
*Now each HH except for a small number has one head*;
tab headcount;
** tab count of JS measure *;
bysort hhid: egen jsheadcount = count(jshead);
*Now each JS HH has one head*;
tab jsheadcount; 

*** look at characteristics of JS and MB heads;
tab year if noheadhh==1;
tab year if noheadhh==1 & jshead!=head;
tab year if noheadhh==1 & jshead==head;
su p_age p_sex if jshead==1 & jshead!=head;
su p_age p_sex if head==1 & jshead!=head;

*** this still leaves a few households with only unrelated individuals and no one over 15;
*** should we drop these households?;
*** still 158 of them;
su if headcount==0;

*** all are HH with noone 15 and older;
su p_age if headcount==0;

**** for debugging;
**** pause on;
**** pause;

*** fill in head variable as 0;
replace head = 0 if head ==. & headcount!=0;
label variable noheadhh "Household without a head 15 and older";

drop headcount h1 jsheadcount;
*** MB end;


gen under18 = p_age<18;
gen elderly = p_age>=65;
gen one = 1;
bysort hhid: egen h_kidu18 = sum(under18);
bysort hhid: egen h_numpers = sum(one);
bysort hhid: egen h_elderly = sum(elderly);

*EK comments this out since this part of the code is now included in the infile.do file;
/*;
gen p_inc_asset = p_inc_int + p_inc_div;
*/;
gen p_inc_dis=.;


*** aggregate up values to household level for $ amounts and recipiency;
cap rename h_inc_cs h_inc_othpr;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth {;
bysort hhid: egen h_inc_`n' = sum(p_inc_`n');
};


*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** 6/11;
*** make variable for any receipt of unemployment compensation;
gen anyunemployment = p_rec_un==1;
tab anyunemployment p_rec_un;
egen h_anyinc_uc = max(anyunemployment), by(hhid year);
tab h_anyinc_uc p_rec_un;
assert h_anyinc_uc ==1 if p_rec_un==1;
pause;
tab h_anyinc_uc p_rec_un;
tab h_anyinc_uc, su( f_inc_vet);
tab f_rec_inc_vet p_rec_un;
tab f_rec_inc_vet h_anyinc_uc;
*EK ends;

*** MB;
**** check that our sum of people matches canned number of people;
capture noisily assert h_numpers==h_numpers_cps;  /*EK added "capture noisily" bc of 3 mismatches */;

**** check that hh income our way matches hhincome total canned;
tab year if h_inc_tot == h_hhinctot;
tab year if h_inc_tot != h_hhinctot;

*** non matching;
list hhid year h_kidu18 h_numpers h_elderly p_age p_sex if h_inc_tot!=h_hhinctot;

**** drop canned CPS values since they always match;
drop h_numpers_cps h_hhinctot;

*** MB end;

************************************;
*Generating small family variables *;
************************************;
egen sfamid = group(year h_seq f_pos);
gen sfhead = 1 if p_pos==f_headinx;
**** Note using count works here because sfhead is . if not 1 ******;
* fill in 0s below *;
bysort sfamid: egen sfheadcount=count(sfhead);
*Check that each small family has exactly one head*;
tab sfheadcount; 

** MB notes;
*** JS had;
* One weird observation dropped*;
** no longer happening;

drop if sfheadcount==0; 

bysort sfamid: egen sf_kidu18 = sum(under18);
bysort sfamid: egen sf_numpers = sum(one);
bysort sfamid: egen sf_elderly = sum(elderly);

*** see if number of persons in the subfamily matches the total number of persons reported for the family;
*** suspicion is it does not, as for 80-88 we think that primary family gets number of persons and income;
*** from related subfamilies, but related subfamilies get their own family income;
tab f_kind year if sf_numpers==f_numpers;
*** all the ones that don't match are primary families;
tab f_kind year if sf_numpers!=f_numpers;

*** check that indeed number of persons for CPS canned family measure is big family concept;
gen t = 1 if f_kind==1 | f_kind==2;
egen countbigf = sum(t), by(hhid);
su countbig f_numpers if sf_numpers!=f_numpers;
drop t;

*** fill in sfhead;
replace sfhead=0 if sfhead==.;

drop sfheadcount;

** EK adds a few variables here when she adds Taxsim calculations;
cap rename sf_inc_cs sf_inc_othpr;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth val rnt alm div uc {;
	bysort sfamid: egen sf_inc_`n' = sum(p_inc_`n');
};

***************************************************;
*Generating big family variables*;
* pool all people related to the head with the head *;
***************************************************;
*** People related to head;
*** Note that the following variables change in 1988 becuase we are using 1988;
*** data consistent with the earlier files (not the rewrite file);
*** f_pos ranges from 41 to 55 in data pre-1989;
gen relatedfamily = 100 if (f_kind==1 | f_kind==2) & year<=1988;
*** family if unrelated to head;
gen unrelatedfamily = f_pos if f_kind==3 & year<=1988;
** primary individual no family;
gen singlehher = f_pos if f_kind==4 & year<=1988;
gen unrelindiv = f_pos if f_kind==5 & year<=1988;

for any related unrelated singlehher unrelindiv: replace X =0 if X==.;

corr related unrelated singlehher unrelindiv;

** check categories are mutually exclusive;
gen tmper = related + unrelated + singlehher + unrelindiv;
tab year tmper ;
** tmper should be f_pos unless in big family, then is 100;
assert tmper==f_pos if f_kind>=3;
assert tmper==100 if f_kind==1 | f_kind==2;

tab tmper f_kind;
drop tmper;


*** Big family;
*** puts all related people together;
*** unrelated stay in their families;
egen bfamid = group(year h_seq relatedfamily unrelatedfamily singlehher unrelindiv);
** Unrelated individual, non-family householder, or unrelated subfamily;
*** stay with existing head;
gen bfhead = 1 if p_pos==f_headinx & (f_kind!=1 & f_kind!=2) & year<=1988;
*** For big family (primary family and related subfamilies), head is primary family head ;
replace bfhead = 1 if p_relhd==1 & (f_kind==1|f_kind==2) & year<=1988;
**** Note using count works here because head is . if not 1 ******;
* fill in 0s below *;
bysort bfamid: egen bfheadcount=count(bfhead);
*Check that each big family has exactly one head;
tab bfheadcount; 


*** fill in bfhead;
replace bfhead=0 if bfhead==.;
drop bfheadcount;


bysort bfamid: egen bf_kidu18 = sum(under18);
bysort bfamid: egen bf_numpers = sum(one);
bysort bfamid: egen bf_elderly = sum(elderly);

*** see if number of persons in the subfamily matches the total number of persons reported for the family;
*** matches for all but related subfamilies;
tab f_kind year if bf_numpers==f_numpers;
*** does not match for related subfamilies, consistent with above;
tab f_kind year if bf_numpers!=f_numpers;

** EK adds a few variables here when she adds Taxsim calculations;
cap rename bf_inc_cs bf_inc_othpr;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth {;
bysort bfamid: egen bf_inc_`n' = sum(p_inc_`n');
};

drop relatedfamily unrelatedfamily singlehher unrelindiv; 

*** for debugging;
*** pause on;
*** pause;


*EK uncomments this out;
******************************************************;
*Generating CPS family variables for poverty purposes*;
* to compare to P60 published numbers                *;
******************************************************;
* difference pre-89/post-89 in offical poverty;
* per LA paper BGH if year <89, use little family cutoff for related subfams;
* use big family for primary families;

*** Cannot make a consistent family ID for the CPS family concept;
*** that includes people to sum income over, because the big ;
*** families need the related subfamily income;
*** and the small related subfamilies need their own income only;
*** Can create a head though to get characteristics of;

gen cfhead = bfhead if year<=1988 & f_kind!=2;
replace cfhead=sfhead if year<=1988 & f_kind==2;

**** not going to sum the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;
gen cf_kidu18 = bf_kidu18 if year<=1988 & f_kind!=2;
replace cf_kidu18 = sf_kidu18 if year<=1988 & f_kind==2;
gen cf_numpers = bf_numpers if year<=1988 & f_kind!=2;
replace cf_numpers = sf_numpers if year<=1988 & f_kind==2;
gen cf_elderly = bf_elderly if year<=1988 & f_kind!=2;
replace cf_elderly = sf_elderly if year<=1988 & f_kind==2;

**** not going to sum the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;

foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth {;
gen cf_inc_`n' = bf_inc_`n' if year<=1988 & f_kind!=2;
replace cf_inc_`n' = sf_inc_`n' if year<=1988 & f_kind==2;
};

**** check that our CPS family matches CPS canned measure;
**** doesn't match for 5 obs in 2009;
tab year if cf_numpers!=f_numpers;

*** check family income here cf matches CPS;
*** it does except for 245 observations;
tab year if  cf_inc_tot== f_inc_tot;
*** doesn't match for 245 obs primary and related subfam spread evenly across years;
tab year if  cf_inc_tot!= f_inc_tot;
*** for debugging;
*** pause on;
*** pause;

** drop canned CPS values since they always matches our value;
drop f_numpers ;


/*;
* comment out;
** TOdo fix topcodes;
**********************************************************;
*Changing topcoded values for personal income and wages  *;
**********************************************************;
*Note: there is something screwy about the topcode flags in 1977.*;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn {;
	gen p_inc_`n'_adj=p_inc_`n';
	qui su p_inc_`n' if year==1977;
	qui replace p_inc_`n'_adj = p_inc_`n'*1.45 if p_inc_`n'==r(max) & year==1977;
	qui replace p_inc_`n'_adj = p_inc_`n'*1.45 if p_fl_inc_`n'==1 & year>1977;
};

gen p_inc_asset_adj = p_inc_int_adj + p_inc_div_adj;
gen p_inc_dis_adj = .;

***************************************************;
*Generating adjusted household income variables   *;
***************************************************;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth {;
bysort hhid: egen h_inc_`n'_adj = sum(p_inc_`n'_adj);
};

***************************************************;
*Generating adjusted small family income variables*;
***************************************************;
** EK adds a few variables here when she adds Taxsim calculations;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth val rnt alm div uc {;
bysort sfamid: egen sf_inc_`n'_adj = sum(p_inc_`n'_adj);
};

***************************************************;
*Generating adjusted big family income variables  *;
***************************************************;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth {;
bysort bfamid: egen bf_inc_`n'_adj = sum(p_inc_`n'_adj);
};

****************************************************;
*Generating adjusted census family income variables*;
****************************************************;
**** not going to sum the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth {;
gen cf_inc_`n'_adj = bf_inc_`n'_adj if year<=1988 & f_kind!=2;
replace cf_inc_`n'_adj = sf_inc_`n'_adj if year<=1988 & f_kind==2;
};

*Drop topcode flags now that we've used them*;
capture drop p_fl*; 

** end of comment out;
*/;


** EK modifies code to add TAXSIM variables 05/26/15;

**********************************************************;
*Calculate children, marital status, income at small family level  *;
**********************************************************;			

* We want to assign unrelated children as children of the head of family in primary family;
gen kid_24fts = p_age<=17 & sfhead==0 & p_age!=.;
egen depchild = sum(kid), by(sfamid);
tab depchild sf_kidu18, m;
drop kid_24fts depchild;

gen unrel = p_age<15 & f_kind==5;		// create variable;
tab unrel sfhead, missing;				// all unrelated children are considered heads of small family;

* Allocate unrelated children to primary family/primary individual;
tab head f_kind, missing;				// heads are in primary families or primary individuals;
gen prim_ind = f_kind==4;				// make sure no overlap of primary families and individuals;
egen primind_h = max(prim_ind), by(hhid);
tab f_kind primind_h, missing;

* 1/3 of (unweighted) unrelated kids is going to primary individuals;
tab unrel primind_h, missing;			

gen tmpsfamid = sfamid if f_kind==1 | f_kind==4;
egen tmpsfamid2 = max(tmpsfamid), by (hhid);				// Assign the small family id to each person in the household;
replace sfamid = tmpsfamid2 if unrel==1 & tmpsfamid2!=.;	// Replace sfamid of unrel kids with that of the the primary family;

replace sfhead = 0 if unrel==1;				// Unrelated children no longer small family heads;
tab unrel sfhead, missing;						// Now none of them is sfhead, so can count towards kids;
drop tmpsfami*;

*** Child is 17 or less;
gen kid_24fts = p_age<=17 & sfhead==0 & p_age!=.;
egen depchild = sum(kid), by(sfamid);
tab depchild sf_kidu18, m;
	
*** Number of total dependents;
gen num=1;
egen depx = sum(num), by(sfamid);	
replace depx = depx-1;
drop num;
tab depx depchild, m;
for any depx depchild: replace X=15 if X>15 & X!=.;

*** Exemptions for old age;
gen temphead=0;
replace temphead=1 if sfhead==1 & p_age>=65 & p_age!=.;
bysort sfamid: egen oldhead=total(temphead);
tab oldhead, m;

gen tempspouse=0;
replace tempspouse=1 if sfhead==0 & p_relhd==3 & p_age>=65 & p_age!=.;
bysort sfamid: egen oldspouse=total(tempspouse);
tab oldspouse, m;

gen agex=oldspouse+oldhead;
tab p_marst agex, m;


******;
* Create datafile to run through TAXSIM and then merge it to main data;
******;
preserve;

*** Keep one observation for each small family, ie. the family head;
keep if sfhead==1;
tab year, m;
keep year statefip p_marst p_marwt p_age sfamid sfhead f_type sf_kidu18 depchild depx agex sf_inc_ws sf_inc_se sf_inc_fr 
	sf_inc_val sf_inc_rnt sf_inc_alm sf_inc_div sf_inc_ret sf_inc_sp sf_inc_ss sf_inc_dis sf_inc_uc;

for any sf_inc_ws sf_inc_se sf_inc_fr sf_inc_val sf_inc_rnt sf_inc_alm sf_inc_div sf_inc_ret sf_inc_sp sf_inc_ss ///
	sf_inc_dis sf_inc_uc: replace X=0 if X==.;	

* Marital status;
tab p_marst, m;
gen mstat=1;
replace mstat=2 if p_marst<3;
replace mstat=3 if mstat==1 & depx>0;

** Fix income variables;
gen pwages=sf_inc_ws + sf_inc_se + sf_inc_fr;  			// personal wages (include self-employment);
replace pwages=0 if pwages<0;									// taxsim will not take negatives;
gen dividends=sf_inc_div;					  							// dividends;
gen otherprop=sf_inc_val + sf_inc_rnt + sf_inc_alm; 	// other income (interest, rent, alimony);
gen pensions=sf_inc_ret;
gen gssi=sf_inc_sp + sf_inc_ss + sf_inc_dis;
gen ui=sf_inc_uc;

*Property tax is only at HH level in CPS;

*All missing variables should be 0s;
foreach x in depx depchild agex mstat pwages dividends otherprop pensions gssi ui {;
	replace `x'=0 if `x'==.;
};

*Add SOI state codes;
merge m:1 statefip using stateabb.dta, keepus(state_soi);
tab _merge;
drop if _merge==2;
drop _merge;
tab statefip state_soi, m nol;

*Change CPS year to calendar year;
replace year=year-1;
sum year;

keep sfamid year state_soi depx depchild agex mstat pwages dividends otherprop pensions gssi ui p_age;
rename state_soi state;
tab state year, m;
taxsim9, full replace;

* Did Taxsim compute all calculations?;
gen taxsim_miss= fiitax==.;
tab year taxsim_miss, m;

rename fiitax sf_fedpost_taxsim;				// fed tax post all credits;
rename siitax sf_sttax_taxsim;
rename fica sf_fica_taxsim;    					// sum of employer and employee;
rename v22 sf_ctc_taxsim;
rename v23 sf_ctc_add_taxsim;
rename v25 sf_eic_taxsim;
rename v27 sf_amt_taxsim;
rename v28 sf_fedtax_taxsim;
rename v38 sf_stccc_taxsim;
rename v39 sf_steic_taxsim;

keep sfamid year *taxsim p_age depchild taxsim_miss;
mdesc;

for any sf_fedpost_taxsim sf_sttax_taxsim sf_fica_taxsim sf_ctc_taxsim sf_ctc_add_taxsim sf_eic_taxsim sf_fedtax_taxsim ///
	sf_stccc_taxsim sf_steic_taxsim: replace X=0 if X==.;
replace sf_fica_taxsim=sf_fica_taxsim/2; 		// now only employee paid tax;
gen sf_othcred_taxsim = (sf_fedtax_taxsim + sf_amt_taxsim - sf_fedpost_taxsim) - (sf_ctc_taxsim + sf_ctc_add_taxsim + sf_eic_taxsim);
tab year if sf_othcred_taxsim<-1;

sum;
sum if (p_age<=24 | p_age>=65) & depchild==0;

for any sf_eic_taxsim sf_steic_taxsim: replace X=0 if (p_age<=24 | p_age>=65) & depchild==0;

*Change calendar year to CPS year;
replace year=year+1;
drop depchild;

tempfile taxsim;
save `taxsim', replace;
restore;

merge m:1 sfamid year using `taxsim';
drop _merge;
**EK ends TAXSIM changes;


***************************************************;
* Alternative poverty for HH and big family       *;
***************************************************;
*** can't do for small families b/c family fungible transfer amounts are 0 for all related subfams;

** see these are not 0;
** HH;
for any prop_tax housret fsval enrgyva: tab year if year<=1988, su(h_X);

** family;
for any f_mv_sl f_mv_fs f_houssub f_fngcare f_fngcaid: tab year if year<=1988, su(X);

** person;
** note no fed_ret, fica, instead get pay_dedt, renamed fica, no ctc_crd no actc_crd;
for any p_emcontrb p_fed_tax p_eit_cred p_statetax p_fica : tab year if year<1988, su(X);

** family measures;
*** to confirm: related subfams get a 0 for all years;
*** all family measures are repeated across families;
gen fnmedtransfers = f_mv_fs + f_mv_sl + f_houssub*12;
gen fmedtransfers = f_fngcare + f_fngcaid;

** only sum up for BF head;
gen tmpfnmedtransfers = fnmedtransfers if bfhead==1;
egen hnmedtransfers = sum(tmpfnmedtransfers) , by(hhid);
egen bfnmedtransfers = sum(tmpfnmedtransfers) , by(bfamid);
drop tmpfnmedtransfers;
gen tmpfmedtransfers = fmedtransfers if bfhead==1;
egen hmedtransfers = sum(tmpfmedtransfers) , by(hhid);
egen bfmedtransfers = sum(tmpfmedtransfers) , by(bfamid);
drop tmpfmedtransfers;

*EK adds the following to fix issue with household values;
** Variables at household level;
replace h_enrgyva = 0 if h_enrgyva==. & year<=1981; /*missing values*/;
* for big families, primary family  gets property taxes and housret and energy assistance;
foreach x in enrgyva prop_tax housret {;
	gen temp_`x' = 0;
	replace temp_`x' = h_`x' if f_kind==1 | f_kind==4;
	egen bf_`x' = max(temp_`x'), by(bfamid);
};
drop temp_*;
*EK ends;

** EK modifies code to add TAXSIM variables 05/26/15;
* All taxsim taxes and credits: federal and state taxes, amt taxes, federal EIC, federal child credits, fica;
gen sf_taxesnet_taxsim = - sf_fedtax_taxsim - sf_sttax_taxsim - sf_amt_taxsim + sf_eic_taxsim + sf_ctc_taxsim +  
	sf_ctc_add_taxsim + sf_othcred_taxsim - sf_fica_taxsim;
gen temptax = sf_taxesnet_taxsim if sfhead==1;  		// one value per family;
egen h_taxesnet_taxsim = sum(temptax), by(hhid);
egen bf_taxesnet_taxsim = sum(temptax), by(bfamid);
drop temptax;
**EK ends TAXSIM changes;


/*NOTE: EK changed from f_kind==3 to f_kind==4 in the previous paragraph, as she thinks there was previously
a mistake in the code. In the post-89 code, we assign property taxes and housret and energy assistance 
to primary families and primary individuals, but here we were assigning them to primary families and
to unrelated subfamilies. So EK fixed it to make it consistent with post-89.*/;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** only sum up for BF head;
gen tmpsl = f_mv_sl if bfhead==1;
egen h_sl = sum(tmpsl), by(hhid);
egen bf_sl = sum(tmpsl), by(bfamid);
drop tmpsl;
*EK ends;

*** stuff at person level;
*** employer HI contributions in all year;
egen hemcontrb = sum(p_emcontrb), by(hhid);
egen bfemcontrb = sum(p_emcontrb), by(bfamid);

*** federal retirement contributions;
*** not there in earlier period;
*** leave out;

** taxes and credits;
gen ptaxesnet = -p_fed_tax + p_eit_cred - p_statetax - p_fica;

*** No child tax credits;
egen htaxesnet = sum(ptaxesnet), by(hhid);
egen bftaxesnet = sum(ptaxesnet), by(bfamid);
* version without ctc/actc, same as version with here;
gen ptaxesnet2 = -p_fed_tax + p_eit_cred - p_statetax - p_fica;
egen htaxesnet2 = sum(ptaxesnet2), by(hhid);
egen bftaxesnet2 = sum(ptaxesnet2), by(bfamid);

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
egen heitc = sum(p_eit_cred), by(hhid);
egen bfeitc = sum(p_eit_cred), by(bfamid);
*EK ends;

*** 10/10;
*** two versions;
*** 1  with exception of capital gains and losses, mimics def 2 from 00-08 exp. pov;
*** called defa8a2 in 97-99;
*** ftotval + fs + houssub (monthly * 12) + school lunch + energy value + eitc - fica - fed tax - state tax + cap gain - cap loss;
*** ours leaves out capital gains and losses, adds in the relevant child credits when they are available;
*** (CHANGE FROM CONFERENCE DRAFT, take out h_fed_ret, property taxes, add in energy);
*** 2 includes medical transfers, employee contribution to HI, and housing return on equity;
*** (change from conference draft, take out h_fed_ret, add in energy);
*** neither includes capital gains;
*** both include child tax credits;

*** nmedtransfers is FS + SL + housing subsidy;
*** taxes net is -statetax - fedtax + eitc -fica + child credits when there;
** other version of 1 drops enrgyva, not in til 82;;
gen h_inc_tot_alt1a = h_inc_tot + hnmedtransfers + htaxesnet  if year>=1980;
gen h_inc_tot_alt1 = h_inc_tot + hnmedtransfers + h_enrgyva + htaxesnet ;
*** so two is 1 + fungible caid/care, + employer contrib to hi, + implied rental income from housing - property taxes;
gen h_inc_tot_alt2 = h_inc_tot + hnmedtransfers + h_enrgyva + htaxesnet - h_prop_tax + hmedtransfers + hemcontrb + h_housret;

** EK modifies code to add TAXSIM variables 05/26/15;
gen h_inc_tot_alt1a_taxsim = h_inc_tot + hnmedtransfers + h_taxesnet_taxsim if year>=1980;
gen h_inc_tot_alt1_taxsim = h_inc_tot + hnmedtransfers + h_enrgyva + h_taxesnet_taxsim;
gen h_inc_tot_alt2_taxsim = h_inc_tot + hnmedtransfers + h_enrgyva + h_taxesnet_taxsim -  
	h_prop_tax + hmedtransfers + hemcontrb + h_housret;
**EK ends;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** NEW 6/11;
*** version of income that excludes safety net and eitc;
*** usual cash income total minus ssi minus public assistance + net taxes minus (eitc and child tax credits) plus school lunch;
*** no child tax credits in 80-88;
gen h_inc_tot_notrans = h_inc_tot - h_inc_sp - h_inc_pa + h_sl + htaxesnet - heitc;
*EK ends;

*pause;

*EK comments this out, as it was incorrect;
/*;
*** for big families, primary family  gets property taxes and housret and energy assistance;
gen bf_inc_tot_alt1 = bf_inc_tot + bfnmedtransfers + h_enrgyva + bftaxesnet if f_kind==1 | f_kind==4;
*** other families do not;
replace bf_inc_tot_alt1 = bf_inc_tot + bfnmedtransfers + bftaxesnet if f_kind!=1 & f_kind!=4;
*/

*EK adds this;
gen bf_inc_tot_alt1 = bf_inc_tot + bfnmedtransfers + bf_enrgyva + bftaxesnet;
** other version of 1 drops enrgyva, not in til 82;
gen bf_inc_tot_alt1a = bf_inc_tot + bfnmedtransfers + bftaxesnet;	/*EK added this*/;
gen bf_inc_tot_alt2 = bf_inc_tot + bfnmedtransfers + bftaxesnet + bfmedtransfers + bfemcontrb + 
	bf_enrgyva - bf_prop_tax + bf_housret;
*EK ends;

** EK modifies code to add TAXSIM variables 05/26/15;
gen bf_inc_tot_alt1_taxsim = bf_inc_tot + bfnmedtransfers + bf_enrgyv + bf_taxesnet_taxsim;
gen bf_inc_tot_alt1a_taxsim = bf_inc_tot + bfnmedtransfers + bf_taxesnet_taxsim if year>=1980;
gen bf_inc_tot_alt2_taxsim =  bf_inc_tot + bfnmedtransfers + bf_taxesnet_taxsim + bfmedtransfers 
	+ bfemcontrb + bf_enrgyva - bf_prop_tax + bf_housret;
**EK ends;

*EK comments this out, as it was incorrect;
/*;
*** for big families, primary family  gets property taxes and housret and energy assistance;
gen bf_inc_tot_alt2 = bf_inc_tot + bfnmedtransfers + h_enrgyva + bftaxesnet - h_prop_tax + 
	bfmedtransfers + bfemcontrb + h_housret if f_kind==1 | f_kind==4; 	/*EK changed this*/;
*** other families do not;
replace bf_inc_tot_alt2 = bf_inc_tot + bfnmedtransfers + bftaxesnet + bfmedtransfers + bfemcontrb 
	if f_kind!=1 & f_kind!=4;											/*EK changed this*/;
*/;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** NEW 6/11;
*** version of income that excludes safety net and eitc;
*** usual cash income total minus ssi minus public assistance + net taxes minus (eitc and child tax credits) plus school lunch;
*** no child tax credits in 80-88;
gen bf_inc_tot_notrans = bf_inc_tot - bf_inc_sp - bf_inc_pa + bf_sl + bftaxesnet - bfeitc;
*EK ends;

*** check big family amount summed across heads should be same as hh amount;

gen tmpbfinc_tot = bf_inc_tot if bfhead==1;
egen tmphhsumbfinc = sum(tmpbfinc_tot), by(hhid);

su tmphhsumbfinc h_inc_tot;
drop tmphhsum tmpbfinc_tot;

gen tmpbfinc_tot_alt1 = bf_inc_tot_alt1 if bfhead==1;
egen tmphhsumbfincalt1 = sum(tmpbfinc_tot_alt1), by(hhid);

su tmphhsumbfincalt1 h_inc_tot_alt1;
drop tmphhsumbfincalt1 tmpbfinc_tot_alt1;


***************************************************;
*Poverty Thresholds Households                    *;
***************************************************;
*** Note MB;
*** Decided (barring HH objection) for 1980, don't use official;
*** number which varies by farm/nonfarm, but use forward consistent definition;

*** also note, in general, are treating heads under 18 as adults for purposes of merging thresholds on;

gen kids = h_kidu18;
*** 1 fewer kid if kid is householder and under 18;
replace kids = kids-1 if (p_relhd==1 | p_relhd==2) & under18==1;
	replace kids = 8 if kids>8 & kids~=.;
gen famsize = h_numpers;
	replace famsize = 9 if famsize>9 & famsize~=.;

*** MB added category to get other thresholds for 1/2 person elderly families;
*** under652person is indicator for <=2 persons and no elderly;
*** it is 1 if 2 person family and no elderly;
*** 0 if 2 person family and elderly;
*** -1 if >=3 person family;
gen under652person =1 if famsize<=2 & h_elderly==0;
replace under652person = 0 if famsize<=2 & h_elderly>=1 & h_elderly<.;
replace under652person = -1 if famsize>=3;

*** year of income;
gen calyear = year-1;

sort calyear famsize kids under652person;
merge m:1 calyear famsize kids under652person using pov_thresholds_1980_2015.dta;
* Z merge m:1 calyear famsize kids under652person using /data/brook/cycles/march/pov_thresholds_1980_2014.dta;

*** Note MB for 1980;
*** Decided (barring HH objection) for 1980, don't use offical;
*** number which varies by farm/nonfarm, but use forward consistent definition;
*** 1979 thresholds offical uses farm/nonfarm cutoff, don't use so don't use 1980 survey year data;

tab calyear _merge;
*pause on;
*pause;
*** MB most of 1s are 1977-79;
*** some small share are families with famsize=number of kids under 18;
*** these do not match, mark them;
*** _merge=1 means didn't match years in threshold data;
gen problemthreshold = _merge==1 ;
tab year problemthreshold ;
*** set threshold to missing for these observation;
*** after drop later years;
*** drop if year>=1989;
replace threshold=. if _merge==1;
*** MB end;

*** Dropping combinations of people not in actual data;
drop if _merge==2;
drop _merge;
*** MB naming h so is clear is HH;
gen hpovlt50 = h_inc_tot<=(threshold*.5) if threshold<.;
gen hpovlt150 = h_inc_tot<=(threshold*1.5) if threshold<.;
gen hpov50100 = (threshold*.5)<h_inc_tot & h_inc_tot<=(threshold) if threshold<.;
gen hpov100200 = (threshold)<h_inc_tot & h_inc_tot<=(threshold*2) if threshold<.;
gen hpovcat = 1 if hpovlt50==1;
	replace hpovcat = 2 if hpov50100==1;
	replace hpovcat = 3 if hpov100200==1;
	replace hpovcat = 0 if h_inc_tot>(threshold*2);
gen hbelowpov = h_inc_tot<(threshold) if threshold<.;
su hpov*;
tab hpovcat, missing;
tab year if hpovcat==.;
*pause;

*DC adds cuts of poverty in small bins (09/30/2014);

drop hpovlt*;
forval x=25(25)400 {;
gen hpovlt`x' =   h_inc_tot<=(threshold*`x'/100) if threshold<.;
};

gen hpovgt400 =  h_inc_tot<. & h_inc_tot> (threshold*4  ) if threshold<.;

gen tmp=hpovlt400+hpovgt400;
tab tmp;
drop tmp;

forval x=25(25)375 {;
	local k= `x'+25;
	gen hpov`x'`k' = (threshold*`x'/100)<h_inc_tot & h_inc_tot<=(threshold*`k'/100) if threshold<.;
	sum hpov`x'`k';
};

*note : want to avoid rowsumming hpov50100 and hpov100200;
egen tmp = rowtotal(hpovlt25 hpov100125 hpov125150 hpov150175 hpov175200 hpov2* hpov3* hpov5075 hpov7* hpovgt400);
count if threshold==.;
tab tmp;
drop tmp;

*DC ends;

** EK modifies code to add TAXSIM variables 05/26/15;
* EK makes loop so code is more straighforward;

** alt definition 1 a no LIHEAP, alt definition 1, alt definition 2 (each also for taxsim);
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	gen h`k'povlt50 = h_inc_tot_`k'<=(threshold*.5) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'povlt150 = h_inc_tot_`k'<=(threshold*1.5) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'pov50100 = (threshold*.5)<h_inc_tot_`k' & h_inc_tot_`k'<=(threshold) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'pov100200 = (threshold)<h_inc_tot_`k' & h_inc_tot_`k'<=(threshold*2) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'povcat = 1 if h`k'povlt50==1 & h_inc_tot_`k'<.;
		replace h`k'povcat = 2 if h`k'pov50100==1 & h_inc_tot_`k'<.;
		replace h`k'povcat = 3 if h`k'pov100200==1 & h_inc_tot_`k'<.;
		replace h`k'povcat = 0 if h_inc_tot_`k'>(threshold*2) & h_inc_tot_`k'<.;
	gen h`k'belowpov = h_inc_tot_`k'<(threshold) if threshold<. & h_inc_tot_`k'<.;
	su h`k'pov*;
	tab h`k'povcat, missing;
	tab year if h`k'povcat==.;
};

*DC adds cuts of poverty in small bins (09/30/2014);
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	drop h`k'povlt*;
	forval x=25(25)400 {;
		gen h`k'povlt`x' =   h_inc_tot_`k'<=(threshold*`x'/100) if threshold<. & h_inc_tot_`k'<.;
	};

	gen h`k'povgt400 =  h_inc_tot_`k'<. & h_inc_tot_`k'> (threshold*4) if threshold<.;

	gen tmp=h`k'povlt400+h`k'povgt400;
	tab tmp;
	drop tmp;

	forval x=25(25)375 {;
	local i= `x'+25;
		gen h`k'pov`x'`i' = (threshold*`x'/100)<h_inc_tot_`k' & h_inc_tot_`k'<=(threshold*`i'/100) if threshold<. & h_inc_tot_`k'<.;
		sum h`k'pov`x'`i';
	};

	*note : want to avoid rowsumming h`k'pov50100 and h`k'pov100200;
	egen tmp = rowtotal(h`k'povlt25 h`k'pov100125 h`k'pov125150 h`k'pov150175 h`k'pov175200 h`k'pov2* h`k'pov3* h`k'pov5075 h`k'pov7* h`k'povgt400);
	count if h_inc_tot_`k'==. | threshold==.;
	tab tmp;
	drop tmp;
};
*DC ends;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
** no transfers;
gen hnotranspovlt50 = h_inc_tot_notrans<=(threshold*.5) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspovlt150 = h_inc_tot_notrans<=(threshold*1.5) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspov50100 = (threshold*.5)<h_inc_tot_notrans & h_inc_tot_notrans<=(threshold) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspov100200 = (threshold)<h_inc_tot_notrans & h_inc_tot_notrans<=(threshold*2) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspovcat = 1 if hnotranspovlt50==1 & h_inc_tot_notrans<.;
replace hnotranspovcat = 2 if hnotranspov50100==1 & h_inc_tot_notrans<.;
replace hnotranspovcat = 3 if hnotranspov100200==1 & h_inc_tot_notrans<.;
replace hnotranspovcat = 0 if h_inc_tot_notrans>(threshold*2) & h_inc_tot_notrans<.;
gen hnotransbelowpov = h_inc_tot_notrans<(threshold) if threshold<. & h_inc_tot_notrans<.;
su hnotranspov*;
tab hnotranspovcat, missing;
tab year if hnotranspovcat==.;
*EK ends;

tab halt1belowpov halt2belowpov;
tab halt1belowpov hbelowpov;
tab halt2belowpov hbelowpov;

*DC changes this to include smaller bins;
/*;
*** make sure alternative versions not defined for 1988;
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace halt1X = . if year==1988;
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace halt2X = . if year==1988;

*EK adds this to be consistent with previous two lines;
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace halt1aX = . if year==1988;
*EK ends;
*/;

foreach x in alt1a alt1 alt2  alt1a_taxsim alt1_taxsim alt2_taxsim {;
	forval i=25(25)400 {;
		local k= `i'+25;
		replace h`x'povlt`i'=. if year==1988;
		capture replace h`x'pov`i'`k'=. if year==1988;
	};
};
*DC ends;
**EK ends TAXSIM changes;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace hnotransX = . if year==1988;
*EK ends;

*** MB;
*** Keep hh threshold, problem indicator, drop rest;
rename threshold hthreshold;
rename problemthreshold hprobthreshold;

** check on people with problemthresholds;
list year hhid bfamid sfamid p_age p_sex h_numpers if hprobthreshold==1 & year>=1981;
*** noone over 18 in hh;
assert p_age<18 if hprobthreshold==1 & year>=1981;
drop calyear famsize kids under652person;
*** MB end;


***************************************************;
*Poverty Thresholds Small Families                *;
***************************************************;
*** Note MB;
*** Decided (barring HH objection) for 1980, don't use offical;
*** number which varies by farm/nonfarm, but use forward consistent definition;

*** also note, in general, are treating heads under 18 as adults for purposes of merging thresholds on;

gen kids = sf_kidu18;
*** 1 fewer kid if kid is householder and under 18;
replace kids = kids-1 if (p_relhd==1 | p_relhd==2) & under18==1;
	replace kids = 8 if kids>8 & kids~=.;
gen famsize = sf_numpers;
	replace famsize = 9 if famsize>9 & famsize~=.;

*** MB added category to get other thresholds for 1/2 person elderly families;
*** under652person is indicator for <=2 persons and no elderly;
*** it is 1 if 2 person family and no elderly;
*** 0 if 2 person family and elderly;
*** -1 if >=3 person family;
gen under652person =1 if famsize<=2 & sf_elderly>=0;
replace under652person = 0 if famsize<=2 & sf_elderly>=1 & sf_elderly<.;
replace under652person = -1 if famsize>=3;

*** year of income;
gen calyear = year-1;

sort calyear famsize kids under652person;
merge m:1 calyear famsize kids under652person using pov_thresholds_1980_2015.dta;
**Zmerge m:1 calyear famsize kids under652person using /data/brook/cycles/march/pov_thresholds_1980_2014.dta;

*** Note MB for 1980;
*** Decided (barring HH objection) for 1980, don't use offical;
*** number which varies by farm/nonfarm, but use forward consistent definition;
*** 1979 thresholds offical uses farm/nonfarm cutoff, don't use so don't use 1980 survey year data;

tab calyear _merge;
*** MB most of 1s are 1977-79;
*** some small share are families with famsize=number of kids under 18;
*** these do not match, mark them;
*** _merge=1 means didn't match years in threshold data;
gen problemthreshold = _merge==1 ;
tab year problemthreshold ;
*** set threshold to missing for these observation;
*** after drop later years;
*** drop if year>=1989;
replace threshold=. if _merge==1;
*** MB end;

*** Dropping combinations of people not in actual data;
drop if _merge==2;
drop _merge;
gen sfpovlt50 = sf_inc_tot<=(threshold*.5) if threshold<.;
gen sfpovlt150 = sf_inc_tot<=(threshold*1.5) if threshold<.;
gen sfpov50100 = (threshold*.5)<sf_inc_tot & sf_inc_tot<=(threshold) if threshold<.;
gen sfpov100200 = (threshold)<sf_inc_tot & sf_inc_tot<=(threshold*2) & threshold<.;
gen sfpovcat = 1 if sfpovlt50==1;
	replace sfpovcat = 2 if sfpov50100==1;
	replace sfpovcat = 3 if sfpov100200==1;
	replace sfpovcat = 0 if sf_inc_tot>(threshold*2);
gen sfbelowpov = sf_inc_tot<(threshold) if threshold<.;
su sfpov*;
tab sfpovcat, missing;
tab year if sfpovcat==.;

* MB;
*** Keep sf threshold, problem indicator, drop rest;
rename threshold sfthreshold;
rename problemthreshold sfprobthreshold;
drop calyear famsize kids under652person;

** check on people with problemthresholds;
list year hhid bfamid sfamid p_age p_sex sf_numpers if sfprobthreshold==1 & year>=1981;
*** noone over 18 in sf;
assert p_age<18 if sfprobthreshold==1 & year>=1981;
* MB end;

***************************************************;
*Poverty Thresholds Big Families                  *;
***************************************************;
*** Note MB;
*** Decided (barring HH objection) for 1980, don't use offical;
*** number which varies by farm/nonfarm, but use forward consistent definition;

*** also note, in general, are treating heads under 18 as adults for purposes of merging thresholds on;

gen kids = bf_kidu18;
*** 1 fewer kid if kid is householder and under 18;
replace kids = kids-1 if (p_relhd==1 | p_relhd==2) & under18==1;
	replace kids = 8 if kids>8 & kids~=.;
gen famsize = bf_numpers;
	replace famsize = 9 if famsize>9 & famsize~=.;

*** MB added category to get other thresholds for 1/2 person elderly families;
*** under652person is indicator for <=2 persons and no elderly;
*** it is 1 if 2 person family and no elderly;
*** 0 if 2 person family and elderly;
*** -1 if >=3 person family;
gen under652person =1 if famsize<=2 & bf_elderly>=0;
replace under652person = 0 if famsize<=2 & bf_elderly>=1 & bf_elderly<.;
replace under652person = -1 if famsize>=3;

*** year of income;
gen calyear = year-1;

sort calyear famsize kids under652person;
merge m:1 calyear famsize kids under652person using pov_thresholds_1980_2015.dta;
*Zmerge m:1 calyear famsize kids under652person using /data/brook/cycles/march/pov_thresholds_1980_2014.dta;

*** Note MB for 1980;
*** Decided (barring HH objection) for 1980, don't use offical;
*** number which varies by farm/nonfarm, but use forward consistent definition;
*** 1979 thresholds offical uses farm/nonfarm cutoff, don't use so don't use 1980 survey year data;

tab calyear _merge;
*** MB most of 1s are 1977-79;
*** some small share are families with famsize=number of kids under 18;
*** these do not match, mark them;
*** _merge=1 means didn't match years in threshold data;
gen problemthreshold = _merge==1 ;
tab year problemthreshold ;
*** set threshold to missing for these observation;
*** after drop later years;
*** drop if year>=1989;
replace threshold=. if _merge==1;
*** MB end;

*** Dropping combinations of people not in actual data;
drop if _merge==2;
drop _merge;
gen bfpovlt50 = bf_inc_tot<=(threshold*.5) if threshold<.;
gen bfpovlt150 = bf_inc_tot<=(threshold*1.5) if threshold<.;
gen bfpov50100 = (threshold*.5)<bf_inc_tot & bf_inc_tot<=(threshold) if threshold<.;
gen bfpov100200 = (threshold)<bf_inc_tot & bf_inc_tot<=(threshold*2) if threshold<.;
gen bfpovcat = 1 if bfpovlt50==1;
	replace bfpovcat = 2 if bfpov50100==1;
	replace bfpovcat = 3 if bfpov100200==1;
	replace bfpovcat = 0 if bf_inc_tot>(threshold*2);
gen bfbelowpov = bf_inc_tot<(threshold) if threshold<.;
su bfpov*;
tab bfpovcat, missing;
tab year if bfpovcat==.;

**** MB;
**** check HH info that big family poverty is poverty;
**** very close, off for 626 observations;
tab f_famlis bfbelowpov;


** EK modifies code to add TAXSIM variables 05/26/15;
* EK makes loop so code is more straighforward;

** alt definition 1 a no LIHEAP, alt definition 1, alt definition 2 (each also for taxsim);
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	gen bf`k'povlt50 = bf_inc_tot_`k'<=(threshold*.5) if threshold<. & bf_inc_tot_`k'<.;
	gen bf`k'povlt150 = bf_inc_tot_`k'<=(threshold*1.5) if threshold<. & bf_inc_tot_`k'<.;
	gen bf`k'pov50100 = (threshold*.5)<bf_inc_tot_`k' & bf_inc_tot_`k'<=(threshold) if threshold<. 
		& bf_inc_tot_`k'<.;
	gen bf`k'pov100200 = (threshold)<bf_inc_tot_`k' & bf_inc_tot_`k'<=(threshold*2) if threshold<. 
		& bf_inc_tot_`k'<.;
	gen bf`k'povcat = 1 if bf`k'povlt50==1 & bf_inc_tot_`k'<.;
		replace bf`k'povcat = 2 if bf`k'pov50100==1 & bf_inc_tot_`k'<.;
		replace bf`k'povcat = 3 if bf`k'pov100200==1 & bf_inc_tot_`k'<.;
		replace bf`k'povcat = 0 if bf_inc_tot_`k'>(threshold*2) & bf_inc_tot_`k'<.;
	gen bf`k'belowpov = bf_inc_tot_`k'<(threshold) if threshold<. & bf_inc_tot_`k'<.;
	su bf`k'pov*;
	tab bf`k'povcat, missing;
	tab year if bf`k'povcat==.;
};


*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
** no transfers;
gen bfnotranspovlt50 = bf_inc_tot_notrans<=(threshold*.5) if threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspovlt150 = bf_inc_tot_notrans<=(threshold*1.5) if threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspov50100 = (threshold*.5)<bf_inc_tot_notrans & bf_inc_tot_notrans<=(threshold) if 
	threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspov100200 = (threshold)<bf_inc_tot_notrans & bf_inc_tot_notrans<=(threshold*2) if 
	threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspovcat = 1 if bfnotranspovlt50==1 & bf_inc_tot_notrans<.;
replace bfnotranspovcat = 2 if bfnotranspov50100==1 & bf_inc_tot_notrans<.;
replace bfnotranspovcat = 3 if bfnotranspov100200==1 & bf_inc_tot_notrans<.;
replace bfnotranspovcat = 0 if bf_inc_tot_notrans>(threshold*2) & bf_inc_tot_notrans<.;
gen bfnotransbelowpov = bf_inc_tot_notrans<(threshold) if threshold<. & bf_inc_tot_notrans<.;
su bfnotranspov*;
tab bfnotranspovcat, missing;
tab year if bfnotranspovcat==.;
*EK ends;

tab bfalt1belowpov bfalt2belowpov;
tab bfalt1belowpov bfbelowpov;
tab bfalt2belowpov bfbelowpov;


*** make sure alternative versions not defined for 1988;
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace bf`k'X = . if year==1988;
};

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace bfnotransX = . if year==1988;
*EK ends;
**EK ends TAXSIM changes;

* MB;
*** Keep bf threshold, problem indicator, drop rest;
rename threshold bfthreshold;
rename problemthreshold bfprobthreshold;
drop calyear famsize kids under652person;

** check on people with problemthresholds;
list year hhid bfamid sfamid p_age p_sex bf_numpers if bfprobthreshold==1 & year>=1981;
*** noone over 18 in bf;
assert p_age<18 if bfprobthreshold==1 & year>=1981;
* MB end;

/*;
***************************************************;
*Poverty Thresholds Census Families               *;
***************************************************;
*** Not going to do the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;

gen cfthreshold = sfthreshold if year<=1988 & f_kind==2;
replace cfthreshold = bfthreshold if year<=1988 & f_kind!=2;

gen cfprobthreshold = sfprobthreshold if year<=1988 & f_kind==2; 
replace cfprobthreshold = bfprobthreshold if year<=1988 & f_kind!=2; 


gen cfpovlt50 = cf_inc_tot<=(cfthreshold*.5) if cfthreshold<.; 
gen cfpov50100 = (cfthreshold*.5)<cf_inc_tot & cf_inc_tot<=(cfthreshold) if cfthreshold<.;
gen cfpov100200 = (cfthreshold)<cf_inc_tot & cf_inc_tot<=(cfthreshold*2) if cfthreshold<.;
gen cfpovcat = 1 if cfpovlt50==1;
	replace cfpovcat = 2 if cfpov50100==1;
	replace cfpovcat = 3 if cfpov100200==1;
	replace cfpovcat = 0 if cf_inc_tot>(cfthreshold*2);
gen cfbelowpov = cf_inc_tot<(cfthreshold) if cfthreshold<.;

su cfpov*;
tab cfpovcat, missing;
tab year if cfpovcat==.;

*** check this with poverty measure in CPS;
*** almost 100% on, survey year 1982 forward;
*** very close in 1981 (using 1980 adjusted thresholds);
tab f_famlis cfbelowpov;

*** show nonmatches are mostly 1981 CPS except for 2 observations in 1982;
tab f_famlis cfbelowpov if year>=1982;
tab year if f_famlis==1 & cfbelowpov==0;
tab f_famlis cfbelowpov if year==1981;
*** two problematic observations in 1982;
list hhid year f_kind *threshold *_inc_tot f_famlis *belowpov p_age if f_famlis==1 & cfbelowpov==0 & cfbelowpov==0 & year>=1982;

tab year f_famlis if cfbelowpov==.;
*/;

*************************;
** Compress            **;
*************************;

qui compress;

***************************************************;
*Poverty Guidelines                               *;
***************************************************;
*** MB adds ak/hi and pre-82;
*** merge on previous year also, as guidelines change typically in March;
gen byte ak = statefip==2;
gen byte hi = statefip==15;
gen calyear = year-1;
sort calyear ak hi;
merge m:1 calyear ak hi using pov_guidelines_1977_2016.dta;
*Z merge m:1 calyear ak hi using /data/brook/cycles/march/pov_guidelines_1977_2014.dta;
tab calyear _merge;
tab year _merge;

*** 1s are 1977 year data;
*** for debugging;
*** pause on;
*** pause;
drop if _merge==2;
drop _merge;
*** end mb;


*** Households;
gen hpovguide_amt = fpl1;
replace hpovguide_amt = fpl1 + fpladdl*(h_numpers-1) if h_numpers>1;
gen hpovguide_ratio = h_inc_tot/hpovguide_amt;


*** Small Families;
gen sfpovguide_amt = fpl1;
replace sfpovguide_amt = fpl1 + fpladdl*(sf_numpers-1) if sf_numpers>1;
gen sfpovguide_ratio = sf_inc_tot/sfpovguide_amt;

*** Big Families;
gen bfpovguide_amt = fpl1;
replace bfpovguide_amt = fpl1 + fpladdl*(bf_numpers-1) if bf_numpers>1;
gen bfpovguide_ratio = bf_inc_tot/bfpovguide_amt;


/*;
*** Census Families;
gen cfpovguide_amt = fpl1;
replace cfpovguide_amt = fpl1 + fpladdl*(cf_numpers-1) if cf_numpers>1;
gen cfpovguide_ratio = cf_inc_tot/cfpovguide_amt;
*** MB end;
*/;

***************************************************;
*Deflate using CPI-U                              *;
** (was cpi-U X1, same from 82 on) marches HH     *;
***************************************************;
gen incyear = year-1;
sort incyear;
merge m:1 incyear using cpi;
*Z merge m:1 incyear using /data/brook/cycles/march/cpi;
tab _merge;
*only drops years from CPI data not in this CPS sample*;
keep if _merge==3; 
drop _merge;
** to do put back cf_inc*;
su h_inc_tot if year==1987;
** put in real 2009 $;
*** CPI for 2009 is 214.537;
*** ADD in other $ amounts food stamps energy assistance;
foreach n of varlist p_inc_* h_inc* sf_inc* bf_inc* f_inc* h_fsval h_enrgyva {;
  qui gen r_`n' = (`n' * 214.537)/(cpi_u);
};


***************************************************;
*Education/Weight variables                       *;
***************************************************;
gen lths = (p_higrade<=12);
	replace lths = 1 if p_higrade==13 & p_gradecom==2;
gen hsgrad = (p_higrade==13 & p_gradecom==1);
gen somecol = (p_higrade>13 & p_higrade<=16);
	replace somecol = 1 if p_higrade==17 & p_gradecom==2;
gen colgrad = (p_higrade==17 & p_gradecom==1) | (p_higrade>17 & p_higrade~=.);

gen edcat=1 if lths==1;
	replace edcat=2 if hsgrad==1;
	replace edcat=3 if somecol==1;
	replace edcat=4 if colgrad==1;

tab edcat, missing;

*DC adds on 11/26/2014;

gen degree_2yr= (p_higrade>=14);
gen degree_4yr= (p_higrade>=16); 

gen enrolled= (p_ftpt_st==1 | p_ftpt_st==2);
tab2 p_attend enrolled;
gen enrolled_ft= p_ftpt_st==1;

gen hsgrad_enrolled = (p_higrade>=12 & enrolled==1) if p_higrade>=12;

*DC ends;

*** Household;
gen head_edcat = edcat if head==1;
replace head_edcat = 0 if head~=1;
gen head_wgt = p_marwt if head==1;
replace head_wgt = 0 if head~=1;
bysort hhid: egen h_edcat = max(head_edcat);
bysort hhid: egen h_wgt = max(head_wgt);

*** MB;
*** check this;
su h_wgt h_edcat;
su h_wgt h_edcat if head==1;
tab year if h_wgt==0 & h_edcat==0;
*** MB end;

drop head_edcat head_wgt;


*** Small family;
gen sfhead_edcat = edcat if sfhead==1;
replace sfhead_edcat = 0 if sfhead~=1;
gen sfhead_wgt = p_marwt if sfhead==1;
replace sfhead_wgt = 0 if sfhead~=1;
bysort sfamid: egen sf_edcat = max(sfhead_edcat);
bysort sfamid: egen sf_wgt = max(sfhead_wgt);

*** check this;
su sf_wgt sf_edcat;
su sf_wgt sf_edcat if head==1;
tab year if sf_wgt==0 & sf_edcat==0;

drop sfhead_edcat sfhead_wgt;

*** Big family;
gen bfhead_edcat = edcat if bfhead==1;
replace bfhead_edcat = 0 if bfhead~=1;
gen bfhead_wgt = p_marwt if bfhead==1;
replace bfhead_wgt = 0 if head~=1;
bysort bfamid: egen bf_edcat = max(bfhead_edcat);
bysort bfamid: egen bf_wgt = max(bfhead_wgt);

drop bfhead_edcat bfhead_wgt;

*** check this;
su bf_wgt bf_edcat;
su bf_wgt bf_edcat if head==1;
tab year if bf_wgt==0 & bf_edcat==0;

/*;
*** CPS family;
*** Not going to do the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;
gen cf_edcat = bf_edcat if year<=1988 & f_kind!=2;
replace cf_edcat = sf_edcat if year<=1988 & f_kind==2;

gen cf_wgt = bf_wgt if year<=1988 & f_kind!=2;
replace cf_wgt = sf_wgt if year<=1988 & f_kind==2;

*** check this;
su cf_wgt cf_edcat;
su cf_wgt cf_edcat if head==1;
tab year if cf_wgt==0 & cf_edcat==0;

*/;

***************************************************;
*Race/ethnicity/Age/Marital Status/Sex Variables  *;
***************************************************;
gen hisp = p_ethnicity>=10 & p_ethnicity<30;
*doesn't know or refused to answer*;
replace hisp=. if p_ethnicity==39 | p_ethnicity==40; 
gen hispdkrf = hisp==.;
replace hisp=0 if hispdkrf==1;

* check;
tab hisp, missing;
*** White non-Hispanic;
gen white = p_race==1 & hisp==0 & hispdkrf==0;
gen blackhisp = p_race==2 | hisp==1;
tab blackhisp, missing;

* MB;
gen black = p_race==2 & hisp==0 & hispdkrf==0;
gen other = black==0 & hisp==0 & white==0 & hispdkrf==0;

** check;
gen t = white + black + hisp + other + hispdkrf;
tab t, missing;
drop t;
* MB end;

gen p_male = p_sex==1;
tab p_male p_sex;


*** Household;
foreach x in white blackhisp hisp hispdkrf black other p_age p_marst p_male {;
gen head_`x' = `x' if head==1;
replace head_`x' = 0 if head~=1;
bysort hhid: egen h_`x' = max(head_`x');
};
rename h_p_age h_age;
rename h_p_marst h_marst;
rename h_p_male h_male;
for any white blackhisp hisp hispdkrf black other p_age p_marst p_male: drop head_X;


*** Small family;
foreach x in white blackhisp hisp hispdkrf black other p_age p_marst p_male {;
gen sfhead_`x' = `x' if sfhead==1;
replace sfhead_`x' = 0 if sfhead~=1;
bysort hhid: egen sf_`x' = max(sfhead_`x');
};
rename sf_p_age sf_age;
rename sf_p_marst sf_marst;
rename sf_p_male sf_male;
for any white blackhisp hisp hispdkrf black other p_age p_marst p_male: drop sfhead_X;

*** Big family;
foreach x in white blackhisp hisp hispdkrf black other p_age p_marst p_male {;
gen bfhead_`x' = `x' if bfhead==1;
replace bfhead_`x' = 0 if bfhead~=1;
bysort hhid: egen bf_`x' = max(bfhead_`x');
};
rename bf_p_age bf_age;
rename bf_p_marst bf_marst;
rename bf_p_male bf_male;
for any white blackhisp hisp hispdkrf black other p_age p_marst p_male: drop bfhead_X;

/*;
*** CPS family;
*** Not going to do the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;
for any white blackhisp hisp hispdkrf black age marst male: gen cf_X = bf_X if year<=1988 & f_kind!=2;
for any white blackhisp hisp hispdkrf black age marst male: replace cf_X = sf_X if year<=1988 & f_kind==2;
*/;

***************************************************;
* Health Insurance Variables                      *;
***************************************************;

*** going to try to get a clean household measure;
*** also something we can look at at the family levels;
*** Variables;
*** A) Asked consistently and recoded by Census 80-88;
*** For workers last year: Was this person on group HI the employer paid for (and who else covered by this);
*** For 15 and older: Was this person on Medicare? Set to 0 for kids;
*** For 15 and older: Was this person on Medicaid? Children who would have been expected to be covered;
*** 	are coded as yes (e.g., kids in AFDC families);
*** For 15 and older: Was this person on Champus or other military health care?;

*** B) Only recoded and asked 80, 83-88;
*** For 15 and older: Tas this person covered by other insurance and who else in HH covered by this?;
*** I think 
*** ;
*** the non-recoded version was also asked in 1982 (documentation says the recoded version is there, but it isn't);

*** only for 15 plus, 80 on, recode;
gen p_medicare = 1 if p_covmedcr==1;
replace p_medicare= 0 if p_covmedcr==2;
tab p_medicare p_covmedcr if year>=1980, missing;
drop p_covmedcr;


*** only asked 15 plus, 80 on, recoded to include kids;
gen p_medicaid = 1 if p_covmedcd==1;
replace p_medicaid= 0 if p_covmedcd==2;
tab p_medicaid p_covmedcd if year>=1980, missing;
drop p_covmedcd;


*** 80 on, recoded to include kids;
gen p_empgrouphi = 1 if p_covergh==1;
replace p_empgrouphi= 0 if p_covergh==2;
tab p_empgrouphi p_covergh if year>=1980, missing;
drop p_covergh;

*** 80 on, recoded to include kids;
gen p_militarhi = 1 if p_covercp==1;
replace p_militarhi= 0 if p_covercp==2;
tab p_militarhi p_covercp if year>=1980, missing;
drop p_covercp;

*** 80, 83-88, 15 plus, recoded to include kids;
*** going to punt on inputs to this otherwise as would be hard to assign to right kids;
*** and 81 has no data;
gen p_othhi = 1 if p_coverhi==1;
replace p_othhi= 0 if p_coverhi==2;
tab p_othhi p_coverhi if year>=1980, missing;
*** check years are 77-79, 81 82;
tab year if p_othhi==.;
drop p_coverhi;

*** our variables;
*** public will incluce military;
gen p_publichi = p_medicaid==1 | p_medicare ==1 |p_militarhi==1 if p_medicaid<. & p_medicare<. & p_militarhi<.;
label variable p_publichi "Medicaid/Medicare/Military";

gen p_anyhi1 = p_publichi==1 | p_empgrouphi==1 | p_othhi==1 if p_publichi<. & p_empgrouphi<. & p_othhi<.;
label variable p_anyhi1 "Medicaid/Medicare/Military/Employer provided Group/Other HI, missing 82-83";

gen p_nohi1 = 1-p_anyhi1;
label variable p_nohi1 "No Medicaid/Medicare/Military/Employer provided Group/Other HI, missing 82-83";

gen p_anyhi2 = p_publichi==1 | p_empgrouphi==1 if p_publichi<. & p_empgrouphi<.;
label variable p_anyhi2 "Medicaid/Medicare/Military/Employer provided Group";

gen p_nohi2 = 1-p_anyhi2;
label variable p_nohi2 "No Medicaid/Medicare/Military/Employer provided Group excludes individual/retiree";

for any 1 2: tab p_anyhiX p_nohiX;

for any public medicaid medicare empgrouphi militarhi othhi anyhi1 anyhi2: tab year p_X;

tab p_medicaid p_medicare if p_militarhi==1, su(p_publichi);

tab p_medicaid p_medicare if p_militarhi==0, su(p_publichi);

*** variables for kids;
*** stick to under 18 even though medicaid can be for 18 year olds;
gen p_kidcaid = p_medicaid  * (p_age<18);
tab p_medicaid p_kidcaid if p_age<18;
tab p_medicaid p_kidcaid if p_age>=18;

gen p_kidpub = p_publichi  * (p_age<18);
tab p_publichi p_kidpub if p_age<18;
tab p_publichi p_kidpub if p_age>=18;


gen p_kidanyhi1 = p_anyhi1  * (p_age<18);
tab p_anyhi1 p_kidanyhi1 if p_age<18;
tab p_anyhi1 p_kidanyhi1 if p_age>=18;

gen p_kidanyhi2 = p_anyhi2  * (p_age<18);
tab p_anyhi2 p_kidanyhi2 if p_age<18;
tab p_anyhi2 p_kidanyhi2 if p_age>=18;

gen p_kidnohi1 = p_nohi1  * (p_age<18);
tab p_nohi1 p_kidnohi1 if p_age<18;
tab p_nohi1 p_kidnohi1 if p_age>=18;

gen p_kidnohi2 = p_nohi2  * (p_age<18);
tab p_nohi2 p_kidnohi2 if p_age<18;
tab p_nohi2 p_kidnohi2 if p_age>=18;

*****************************************;
*** Measures for anyone in HH/Families***;
*****************************************;

**** Household;
foreach x in medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 {;
bysort hhid: egen h_any`x' = max(p_`x');
};

**** Small family;
foreach x in medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 {;
bysort sfamid: egen sf_any`x' = max(p_`x');
};

**** Big family;
foreach x in medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 {;
bysort bfamid: egen bf_any`x' = max(p_`x');
};

/*;
*** Census family;
*** CPS family;
*** Not going to do the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;
for any medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 : gen cf_anyX = bf_anyX if year<=1988 & f_kind!=2;
for any medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 : replace cf_anyX = sf_anyX if year<=1988 & f_kind==2;
*/;

******************************************************************************;
* Food Stamps, School Lunch, LIHEAP, Tenure, Public Housing, Rental subsidy  *;
******************************************************************************;
*** Only household concepts;

*** left out category tenure==3, no cash rent;
gen h_rents = h_tenure==2 if h_tenure<.;
gen h_owns= h_tenure==1 if h_tenure<.;

tab h_rents h_tenure;
tab h_owns h_tenure;
label variable h_rents "HH rents for cash";
label variable h_owns "HH owned or being bought";

tab h_tenure h_public;
*** most of niu for public is owned;
recode h_public (2=0);

tab h_tenure h_rentsub;
*** most of niu for rentsub is owned;
recode h_rentsub (2=0);

** drop tenure now;
drop h_tenure;


recode h_foodstp (2=0);
tab year h_foodstp, missing;

tab h_foodstp, su(h_fsnum);

su h_foodstp h_fsnum h_fsmo r_h_fsval;

for any hhotlun freelun: recode h_X(2=0);
for any hhotlun freelun: tab year h_X, missing;
su h_hhotlun h_freelun h_hnumhot;

** weird small number of missings, leave as 0;
recode h_enrgyas (2=0);
tab year h_enrgyas, missing;

su h_enrgyas r_h_enrgyva;


*EK adds living arrangement variables;
****************************************;
** Living Arrangements		       	   *;
****************************************;
* First tab important variables that we use to construct variables;
for any p_ftpt p_famrel p_relhd p_attend: tab X, missing;
tab f_kind p_famtyp, missing;   /*same in all observations, but numbers have diff defs*/;

* Create new f_kindtemp that matches the post-89 f_kind numbers and definitions;
* Before f_kind was: 1 family, 2 related subfamily, 3 unrelated sub, 4 nonfamily hholder, 5 unrel indiv;
* Now f_kindtemp is: 1 family, 2 nonfamily hholder, 3 related subfamily, 4 unrelated sub, 5 unrel indiv;
gen f_kindtemp = f_kind;
recode f_kindtemp (1 = 1) (2 = 3) (3 = 4) (4 = 2) (5 = 5);

* Number of people in household and in big family already exist. They are: h_numpers and bf_numpers ;

* Number of families in each household;
gen family = (f_kindtemp==1 | f_kindtemp==3 | f_kindtemp==4) & sfhead==1;
bysort hhid: egen h_numfams = total(family);
tab h_numfams, missing;

* Number of small family heads in each household;
bysort hhid: egen h_numsfheads = total(sfhead);
tab h_numsfheads, missing;

* Extra adults: num of adults other than the householder or spouse of the householder;
gen othadult = p_age>=18 & head==0 & p_relhd!=3; 
bysort hhid: egen h_numothadults = total(othadult);

* Presence of young adult (18-30) not in college and not working full time;
* HH asks to construct two variables, one with and the other without the school enrol var (present from
1986 onwards);
* Version 1;
gen yadult = p_age>=18 & p_age<=30 & p_ftpt!=1 & p_esr!=5 ; 
		/*in post-89 we have different variable name, but with same meaning*/;
		/*also, a_ptft is different in post-89 (where FT==2)*/;
bysort hhid: egen h_numyadults = total(yadult);
gen h_anyyadults1 = h_numyadults>0;
tab h_numyadults h_anyyadults1, missing;
drop yadult h_numyadults;
* Version 2;
gen yadult = p_age>=18 & p_age<=30 & p_ftpt!=1 & p_esr!=5 & p_attend!=1 ; 
bysort hhid: egen h_numyadults = total(yadult);
gen h_anyyadults2 = h_numyadults>0;
tab h_numyadults h_anyyadults2, missing;
drop yadult h_numyadults;

*Define young adult sample;
gen yadult = p_age>=18 & p_age<=30;

* Young adult living alone or with other unrelated individuals;
tab h_numfams yadult, missing;
gen yadult_alone = yadult==1 & h_numfams==0;
tab h_numfams yadult_alone, missing; 

* Is the head of HH or sf a young adult?;
gen headyadult = yadult ==1 & head == 1;
bysort hhid: egen h_headyadult=total(headyadult);
tab h_numfams headyadult, missing;
tab f_kindtemp h_headyadult, missing;

gen sfheadyadult = yadult==1 & sfhead == 1;
bysort sfamid: egen sf_headyadult=total(sfheadyadult);
tab h_numfams sfheadyadult, missing;
tab f_kindtemp sf_headyadult, missing;

* Young adult living with parents;
gen yadult_par = 0;
replace yadult_par = 1 if yadult==1 & p_famrel==3; 		/*the young adults is child of the sf head*/;
for any h_numfams f_kindtemp: tab X yadult_par, missing; 	/*seems ok*/;

replace yadult_par = 1 if yadult==1 & p_relhd==4;   	/*young adult is child of the hh head*/;
for any h_numfams f_kindtemp: tab X yadult_par, missing; 	/*seems ok*/;

/*; 
*can't do this part (like in post-89 period) because variable not detailed enough ;
replace yadult_par = 1 if yadult==1 & p_relhd==11;		/*young adult is foster child of the hh head*/;
for any h_numfams f_kindtemp: tab X yadult_par, missing;	/*foster kids seem to be secondary indivs*/;

gen temp1 = h_headyadult==1 & p_relhd==8;
bysort hhid: egen temp2=total(temp1);
replace yadult_par = 1 if headyadult==1 & temp2>0;      /*are parents of young head present in hh?*/;
for any h_numfams f_kindtemp: tab X yadult_par, missing;
*initially, EK was also including step children, but she noticed this variable was discontinued in 1993; 
*/;

* Young adult living with other relatives;
gen yadult_rel = 0;
replace yadult_rel = 1 if yadult==1 & p_famrel==4; /*the young adults is relative of the sf head*/;
*young adult is sibling or other relative of the household head;
replace yadult_rel = 1 if yadult==1 & p_relhd==5; 
for any h_numfams f_kindtemp: tab X yadult_rel, missing; 

gen temp1 = sf_headyadult==1 & p_famrel==4;
bysort sfamid: egen temp2=total(temp1);
tab f_kindtemp temp2, missing;
replace yadult_rel = 1 if sfheadyadult==1 & temp2>0;      /*are relatives of young sf head present in sf?*/;
for any h_numfams f_kindtemp: tab X yadult_rel, missing;

* Young adult living with others;
gen yadult_oth = yadult==1 & (yadult_rel + yadult_par + yadult_alone ==0);

* Create missing variable for young adult variables if no young adults;
* By doing this, when we collapse at the state level we have averages for the young adults sample only;
for any yadult_alone yadult_par yadult_rel yadult_oth: replace X=. if yadult==0;
gen temp3 = yadult_rel + yadult_par + yadult_alone + yadult_oth;
tab yadult temp3, missing;

drop f_kindtemp temp1 temp2 temp3 family *eadyadult;

* Add labels;
label var h_numfams "Number of families in HH";
label var h_numsfheads "Number of small family heads in HH";
label var h_numothadults "Number of other adults in HH";
label var h_anyyadults1 "Presence of young adults not in school and not working FT in HH, Ver 1";
label var h_anyyadults2 "Presence of young adults not in school and not working FT in HH, Ver 2";
label var yadult "Young adult (18-30)";
label var yadult_alone "Young adult living alone or with unrelated individuals";
label var yadult_par "Young adult living with parents";
label var yadult_rel "Young adult living with other relatives";
label var yadult_oth "Young adult living with other people";

*EK ends here;

*DC adds living arrangement variables from micro.do, lines 93-509;

*use micro , clear;

capture drop calyear;
gen calyear=year-1;


*********************************************************
*********************************************************
*********************************************************
***       understand better the young adult stuff    ****
***       suggest alternative codings
*********************************************************
*********************************************************
*********************************************************

*********************************************************
***       understand better the young adult stuff    ****
*********************************************************

*** in 89 on period, know parentline if they are in house;
*** see how good a job we are doing;

*DC notes: p_parentline variable not present in 80-88 dataset;
/*
for any alone par rel oth:  tab p_parentline yadult_X if p_age>=18 & p_age<=30;
*** for alone, makes sense, almost none of those alone have any parent in HH;
tab p_parentline p_famrel if  yadult_alone==1 & p_parentline!=0;
*** ftype==5;
tab p_parentline f_type if  yadult_alone==1 & p_parentline!=0, nolab;

*** for par a small number of parents lines defined, but not yadult_par==1;
tab p_parentline p_famrel if  yadult_par==0 & p_parentline!=0;

*** for rel, most of parent line is 2 relationship;
tab p_parentline p_famrel if  yadult_rel==1 & p_parentline!=0;

** see marital status for lots of the yadults;
tab  p_marst yadult_oth if yadult==1;
tab  p_marst yadult_rel if yadult==1;

*** try to see who others are;
*** evenly split between heads and others;
*** how many are heads;
tab head yadult_oth;

*** look at relation to head;
*** many heads;
*** many wives;
*** wives of heads , husbands of heads;
tab p_famrel if yadult_oth==1;
*** many married;
tab p_marst if yadult_oth==1;

*** many heads; 
tab head if yadult_oth==1;

** many wives and ref. person with relatives;
tab p_relhd if yadult_oth==1 & h_numfams==1 & year>=1989;
*/
*****************************************************
*** Now try to define consistent value MB suggests **
*****************************************************

*** try to think through how to define;
*** young adult living alone, no other families;
*** alternative would say, I am a young adult, no other families I am related too;
*** see how different that is;
*** this way might include POSSLQs;

for any alonall alonenokid alonewkid alone marriednokid marriedwkid married par rel: gen myadult_X = 0 if yadult==1;

***************************
*** Alone, no kids	***
***************************
*** head, no relatives;
replace myadult_alonenokid = 1 if yadult==1 &  (p_relhd==2 | p_relhd==7 ) & year<=1988;
replace myadult_alonenokid = 1 if yadult==1 &  (p_relhd==2 | p_relhd==14 ) & year>=1989;

*** correlation of this and current EK version is 0.79 for CA;
tab myadult_alonenokid yadult_alone;
corr myadult_alonenokid yadult_alone;

*** mine is 1, hers is 0, unrelated individuals who are never heads, or 2s all who are heads;
tab year p_relhd if myadult_alonenokid==1 & yadult_alone==0, nolab;
tab year p_relhd if myadult_alonenokid==1 & yadult_alone==0, su(head) nolab;

*** hers is 1 mine is 0, all 11, 12, 13 for 89 forward;
*** Foster kids probably shouldn't count as living alone so OK to leave those out;
*** Partner roommate likely contains some cohabs but nothing we can do about this; 
*** foster kids (11), nonrelatives with relatives starting in 1994 (12) and 13 (roommate/partner);
tab year p_relhd if myadult_alonenokid==0 & yadult_alone==1, nolab;
tab year p_relhd if myadult_alonenokid==0 & yadult_alone==1, su(head) nolab;
**** here 11s/12s are family type 5 (unrelated individuals); 
**** possible coding error in 12 f_type or p_relhd;
tab f_type if myadult_alonenokid==0 & yadult_alone==1 & p_relhd==12;
*** all are subfamilies of 1, stick with assumption 12 is error, trusting relationship to head;
tab sf_numpers if myadult_alonenokid==0 & yadult_alone==1 & p_relhd==12;


*** set roommate/partners to be young adults living alone if they have no family;
*** first check they have no family;
tab p_famrel  p_relhd if myadult_alonenokid==0 & yadult_alone==1;
tab sf_numpers if myadult_alonenokid==0 & yadult_alone==1;
replace myadult_alonenokid=1 if yadult==1 & p_relhd==13 & year>=1989;  



***************************
*** Alone, w kids	***
***************************

*** now see what gets added if you are allowed to have your own kids;

*** indicator for being child of head;
gen temp1 = p_relhd==4 if year<=1988;
replace temp1 = p_relhd==5 if year>=1989;
*** any child of head in HH;
egen numtemp1 = total(temp1), by(hhid);
*** alone with a kid if I'm the head and there is at least one own kid of head and; 
*** the number of people in my big family is me plus own kids;
*** and no other relatives who are older;
replace myadult_alonewkid = 1 if yadult==1 & p_relhd==1 & numtemp1>0 & bf_numpers==numtemp1+1;
****
tab myadult_aloneno myadult_alonew;
** check is kids, see; 
egen temp3 = max(myadult_alonewkid), by(bfamid year);
*** see  if most of these people who are related to head are young;
su p_age if temp1==1 & temp3==1 & p_relhd==4 & year<=1988 & myadult_alonewkid!=1;
su p_age if temp1==1 & temp3==1 & p_relhd==5 & year>=1989 & myadult_alonewkid!=1;
*** look at those older than 18;
list hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year<=1988 & p_age>=18;
list hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year>=1989 & p_age>=18;
*** 82 obs in CA, small;
su hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year<=1988 & p_age>=18;
** 27 obs in CA, tiny;
su hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year>=1989 & p_age>=18;
** mutually exclusive (yeah);
su myadult_aloneno myadult_alonew if myadult_alonewkid==1;
su myadult_aloneno myadult_alonew if myadult_alonewkid==0;
drop temp1 numtemp1 temp3; 

gen temp2=0;
** for child of family head, impose that no other relatives;
/* 
*** p_famrel is not still around for <=88 if it were would do following line for pre-88;
*** indicator for child; 
replace temp2 = 1 if p_famrel==3 & year<=1988;
*/
replace temp2 = p_famrel==3 if year>=1989;
*** any child of family in family;
egen numtemp2 = total(temp2), by(sfamid year hhid);
*** alone with a kid if I'm the head of family and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids;
*** and there is no bigger family;
replace myadult_alonewkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+1 & year<=1988 & bf_numpers==sf_numpers;
replace myadult_alonewkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+1 & year>=1989 & bf_numpers==sf_numpers;
** check is kids;
egen temp3 = max(myadult_alonewkid), by(hhid year);

*** check on obs above age 18;
** can't be any of these;
su hhid p_age p_relhd f_type bfamid year p_famrel if temp2==1 & temp3==1 & year<=1988 & p_age>=18 & bf_numpers==sf_numpers;
*** 66 of these in CA (small);
su hhid p_age p_relhd f_type bfamid year p_famrel if temp2==1 & temp3==1 & year>=1989 & p_age>=18;
*** example ref person 27, kid is 31, partner roommate;
*** ignore for now;
list hhid p_age p_relhd f_type bfamid year p_famrel temp2 temp3 bf_numpers p_marst if hhid==2571762;
** mutually exclusive (yeah);
su myadult_aloneno myadult_alonew if myadult_alonewkid==1;
su myadult_aloneno myadult_alonew if myadult_alonewkid==0;
drop temp2 numtemp2 temp3;

***********************************
*** Married, alone, no kids	***
***********************************

*** married and alone if married and head or spouse and only 2 people in big family;
*** or unrelated subfamily and only 2 there and i am one;

*** head married noone else, married af present  (all); 
replace myadult_marriednokid = 1 if yadult==1 & (p_marst==1|p_marst==2)  & sf_numpers==2 & p_relhd==1 & bf_numpers==sf_numpers;
*** married to head noone else in small family;
replace myadult_marriednokid = 1 if yadult==1 & (p_marst==1|p_marst==2)  & p_relhd==3 & sf_numpers==2 & year<=1988  & bf_numpers==sf_numpers;
replace myadult_marriednokid = 1 if yadult==1 & (p_marst==1|p_marst==2)  & (p_relhd==3 | p_relhd==4) & sf_numpers==2 & year>=1989  & bf_numpers==sf_numpers;
*** mutually exclusive (yeah);
su myadult_aloneno myadult_alonewkid myadult_marriednokid if myadult_marriednokid==1;
su myadult_aloneno myadult_alonewkid myadult_marriednokid if myadult_marriednokid==0;


***********************************
*** Married, alone, w kids	***
***********************************
*** now see what gets added if you are allowed to have your own kids;

*** indicator for child of head;
gen temp1 = p_relhd==4 if year<=1988;
replace temp1 = p_relhd==5 if year>=1989;
*** any child of head in HH;
egen numtemp1 = total(temp1), by(hhid);
*** alone with a kid if I'm the head and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and i'm married and bfam is small fam;
replace myadult_marriedwkid = 1 if yadult==1 & p_relhd==1 & numtemp1>0 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & bf_numpers==sf_numpers;
*** alone with a kid if I'm the spouse of head and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and i'm married;
replace myadult_marriedwkid = 1 if yadult==1 & p_relhd==3 & numtemp1>0 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year<=1988 & bf_numpers==sf_numpers;
replace myadult_marriedwkid = 1 if yadult==1 & (p_relhd==3 | p_relhd==4)  & numtemp1>0 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year>=1989 & bf_numpers==sf_numpers;
*** check age;
su p_age if numtemp1>0 & p_relhd==4 & year<=1988  & myadult_marriedwkid!=1;
su p_age if numtemp1>0 & p_relhd==5 & year>=1989 & myadult_marriedwkid!=1;
** check is kids;
egen temp3 = max(myadult_marriedwkid), by(sfamid year);

*** 6488 obs pre 89, all married spouses of head;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year<=1988;
* 0 obs;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & ~(p_marst==1|p_marst==2) & year<=1988;

*** 7566 obs post 88, all spouses of family head;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3 | p_relhd==4) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year>=1989;
** none;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3 | p_relhd==4) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & ~(p_marst==1|p_marst==2) & year>=1989;

*** mutually exclusive  (yeah);
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==0;
drop temp1 numtemp1 temp3;

gen temp2=0;
/* 
*** p_famrel is not still around for <=88 if it were would do following line for pre-88;
*** indicator for child of family head;
replace temp2 = 1 if p_famrel==3 & year<=1988;
*/
replace temp2 = p_famrel==3 if year>=1989;
*** any child of family head in small family;
egen numtemp2 = total(temp2), by(sfamid);
*** married  with a kid if I'm the head of family and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and i'm married;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & bf_numpers==sf_numpers;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1989 & head!=1 & bf_numpers==sf_numpers;

*** married  with a kid if I'm married to the head and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and I'm married;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==2 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & bf_numpers==sf_numpers;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==2 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1989 & head!=1 & bf_numpers==sf_numpers;

** check is kids;
egen temp3 = max(myadult_marriedwkid), by(sfamid year);
*** check age;
su p_age if numtemp2>0 & temp3==1 & myadult_marriedwkid!=1;

*** first look at family heads;
*** check those above 18, none pre88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==1 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;
** none post 88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==1 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1989 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;

*** next  look at family spuses;
*** check those above 18, none pre88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==2 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;
** none post 88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==2 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1989 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;


*** mutually exclusive  (yeah);
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==0;
drop temp2 numtemp2 temp3;


********************************************************************
*** Alone married or unmarried w/wout kids suggested new variable **
*** to replace yadult_alone    	      	   	     	 	  **
********************************************************************;

gen myadult_aloneall = myadult_alonenokid + myadult_alonewkid + myadult_marriednokid + myadult_marriedwkid;
tab year [aw=p_marwt], su(myadult_aloneall);


***********************************
*** unmarried with parents WHAT DO W MARRIED & PARENTS?	***
***********************************
**** parent is householder;
replace myadult_par = 1 if yadult==1 & p_relhd==4 & year<=1988;
replace myadult_par = 1 if yadult==1 & p_relhd==5 & year>=1989;
***** foster kid, 89 on only;
replace myadult_par = 1 if yadult==1 & p_relhd==11 & year>=1989;
*** family is that kid is head;
/* not in my data;
replace myadult_par = 1 if yadult==1 & p_famrel==3 & year<=1988;
*/
replace myadult_par = 1 if yadult==1 & p_famrel==3 & year>=1988;

*** can't do in pre-89, head yadult and parent in hh;
gen headyadult=1 if p_relhd==1 & yadult==1 & year>=1989;
egen h_headyadult = total(headyadult), by(hhid);
tab h_headyadult;

gen temp1 = h_headyadult==1 & p_relhd==8 & year>=1989;
*** do within big family;
bysort bfamid: egen temp2=total(temp1);
replace myadult_par = 1 if headyadult==1 & temp2>0 & year>=1989;     /*are parents of young head present in bigfamily?*/

*** check age;
su p_age if temp1<.;
*** not exactly mutually exclusive with; 
*** myadult_marriedwkid (1 fam in 94 with problems with CA);
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==0;


*** 1 ob in CA;
tab bf_numpers if myadult_par==1 & myadult_marriedwkid==1;
** prioritize p_relhd;
list hhid  p_relhd bf_numpers if myadult_par==1 & myadult_marriedwkid==1;
*** case, hhid=1349345 one of p_relhd==5, but bfamid is different in 1994;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_marriedwkid myadult_par bf_numpers sf_numpers if hhid==1349345, nolab;
*** look into later, for now, turn off prioritize bfamid;
replace myadult_par=0 if myadult_marriedwkid==1 & myadult_par==1;

*** now mutually exclusive;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==0;



*** head of sfam and Yadult;
gen sfheadyadult = yadult==1 & sfhead == 1;
bysort sfamid: egen sf_headyadult=total(sfheadyadult);
drop temp2  temp1;


***********************************
*** unmarried with relatives WHAT DO W MARRIED w rel?	***
*** copy ELIRA
***********************************
* Young adult living with other relatives;
replace myadult_rel = 1 if yadult==1 & p_famrel==4; /*the young adults is relative of the sf head*/
*young adult is sibling or other relative of the household head;
replace myadult_rel = 1 if yadult==1 & p_relhd==5 & year<=1988; 
replace myadult_rel = 1 if yadult==1 & (p_relhd==7 | p_relhd==9 | p_relhd==10) & year>=1989; 

gen temp1 = sf_headyadult==1 & p_famrel==4; 
bysort sfamid: egen temp2=total(temp1);
tab f_kind temp2 if year<=1988, missing;
tab f_type temp2 if year>=1989, missing;
replace myadult_rel = 1 if sfheadyadult==1 & temp2>0 & head!=1;      /*are relatives of young sf head present in sf?*/
*** check age;
su p_age if temp1<.;
*** not mutually exclusive with alonewkid, 3 obs in CA;

su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_rel==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_rel==0;
****
*** not mutually exclusive with alonewkid, 3 obs in CA;
tab myadult_alonewkid myadult_rel;
list hhid  if myadult_alonewkid==1 &  myadult_rel==1;
*** example again, big family id suggests not related but the relhd says are, go with bfamid;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_alonewkid myadult_rel bf_numpers sf_numpers if hhid==1319659, nolab;
** go with bfamid;
replace myadult_rel=0 if myadult_alonewkid==1;

*** not mutually exclusive with married wkid, 6 obs in ca;
list hhid  if myadult_marriedwkid==1 &  myadult_rel==1;
*** example again, big family id suggests not related but the relhd says are, go with bfamid;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_alonewkid myadult_rel bf_numpers sf_numpers if hhid==1348679, nolab;
** go with bfamid;
replace myadult_rel=0 if myadult_marriedwkid==1;
*** not mutually exclusive with par, 7 obs in ca, all natural adopted child;
tab year if myadult_par==1 & myadult_rel==1; 
tab p_relhd if myadult_par==1 & myadult_rel==1 & year>=1989;
list hhid  if myadult_par==1 &  myadult_rel==1;
*** example, here parent is 24, people related to parent are 18 and 25, go with relative;
*** later maybe impose year difference with parent;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_par myadult_rel bf_numpers sf_numpers if hhid==1916069, nolab;
*** go with relative;
replace myadult_par=0 if myadult_rel==1;

su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==0;

drop temp1 temp2;


gen temp1 = sf_headyadult==1 & p_famrel==4;
bysort sfamid: egen temp2=total(temp1);
tab f_kind temp2, missing;
replace yadult_rel = 1 if sfheadyadult==1 & temp2>0 & yadult==1;      /*are relatives of young sf head present in sf?*/
*** check age;
su p_age if temp1<.;

su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==0;


***********************************
*** check not any duplication *****
***********************************;

for any alonewkid marriednokid marriedwkid par rel: tab myadult_aloneno myadult_X;
for any alonenok marriednokid marriedwkid par rel: tab myadult_alonewkid myadult_X;
for any alonenok alonewk marriedwkid par rel: tab myadult_marriedno myadult_X;
for any alonenok alonewk marriednokid par rel: tab myadult_marriedw myadult_X;
for any alonenok alonewk marriednokid marriedwkid rel: tab myadult_par myadult_X;
for any alonenok alonewk marriednokid marriedwkid par: tab myadult_rel myadult_X;


*** tab parent line, alone no kid, mutually exclusive;
*** parent line, alone wkid 10 obs in  CA;
*** married no kid mutually exclusive;
*** marrried w kid 2 obs in 2 years in CA;
*** bunches of obs with parent line not missing and live with par not 1;
*** some with parent line not missing and live with other relative;
*DC notes: no p_parentline variable for 80-88 
*for any alonenok alonewkid marriednokid marriedwkid par rel: tab p_parentline myadult_X;
*** look into live with parent issue; 
*** small number of spouses, a bunch of relatives of head who are reference persons, but bulk are borther sister of head;
*** but parent is in house;
*tab p_relhd p_famrel if p_parentline!=0 & p_parentline<.  & myadult_par==0;

*** if brother/sister of head and that person has a parent, will identify that person as living with relatives;
*tab p_relhd myadult_rel if p_parentline!=0 & myadult_par==0 & yadult==1 & year>=1989;

*DC ends here;


****************************************;
** Doubling up/Female head	       *;
****************************************;
** only household concepts;

** Concept 1, a related subfamily with a kid;
gen relsfunder18 = under18 * (f_kind==2);
tab f_kind relsfunder18;
egen h_relsubfwkid = max(relsfunder18), by(hhid);
tab h_relsubfwkid relsfunder18 if f_kind==2;
drop relsfunder18;
** show can't get this unless there is a family for the householder;
*** h_relsubfwkid=0 if f_kind===4;
tab h_relsubfwkid f_kind;
label variable h_relsubfwkid "Related subfamily with a kid in HH";

** concept 2, any subfamily with a kid;
gen anysfunder18 = under18 * (f_kind==2 | f_kind==3);
tab f_kind anysfunder18;
egen h_anysubfwkid = max(anysfunder18), by(hhid);
tab h_anysubfwkid anysfunder18 if f_kind==2;
drop anysfunder18;
*** could get this wiht unrelated individual HHER;
tab h_anysubfwkid f_kind if h_relsubfwkid==0;
label variable h_anysubfwkid "Any subfamily with a kid in the HH";

** concept 3, any subfamily with a kid and primary family has a kid;
gen anyprimaryfunder18 = under18 * (f_kind==1);
tab f_kind anyprimaryfunder18;
egen h_anyprimaryfwkid = max(anyprimaryfunder18), by(hhid);
gen h_anypfsubfwkid = h_anyprimaryfwkid * h_anysubfwkid;
tab h_anypfsubfwkid h_anyprimaryfwkid;
drop anyprimaryfunder18 h_anyprimaryfwkid;
label variable h_anypfsubfwkid "Primary and subfamily both have a kid in HH";

** concept 4, HH, more than 1 family and at least 1 kid;
egen h_numsf = sum((f_kind==2 & sfhead==1 )|(f_kind==3 & sfhead==1 )), by(hhid);
egen h_anypf = sum(f_kind==1 & head==1), by(hhid);
*** fixed per Hilary comment;
egen h_anykidinfam = sum((f_kind==1 & under18==1)|(f_kind==2 & under18==1)|(f_kind==3 & under18==1)), by(hhid);
*** fixed per Hilary comment;
gen h_gt1sfamwkid = (h_numsf + h_anypf >=2) & h_anykidinfam>=1;
tab h_gt1sfamwkid h_numsf, missing;
tab h_gt1sfamwkid h_anypf, missing;
tab h_gt1sfamwkid h_anykidinfam;
gen tmp = h_numsf + h_anypf;
*** fixed per Hilary comment;
tab h_gt1sfamwkid tmp if h_anykidinfam >=1, missing;
tab h_gt1sfamwkid tmp if h_anykidinfam ==0, missing;
drop h_anypf h_numsf tmp h_anykidinfam;
label variable h_gt1sfamwkid "More than 1 family unit (Census def) and at least 1 has a kid";


** concept 5, HH, with any small family with a female head and a kid;
** Hilary wants to also have it be an unmarried woman, include separated;
*** MB also imposes, diff in ages must be 14 or larger;
*** reference person for a family and female and *** Hilary addition **** unmarried/separated;
*** also MB adds NW/lowed;
*** 10/10 adding indicator for woman who is small family female unmarried/separated head with a kid;

gen tmp = sfhead==1 & p_male==0 & f_kind!=4 & f_kind!=5 & p_marst>=5 & p_marst<=8;
gen tmpage = p_age if sfhead==1 & p_male==0 & f_kind!=4 & f_kind!=5 & p_marst>=5 & p_marst<=8;
** not a white non-hispanic;
gen tmpnw = sfhead==1 & p_male==0 & f_kind!=4 & f_kind!=5 & p_marst>=5 & p_marst<=8 & white!=1;
gen tmpagenw = p_age if sfhead==1 & p_male==0 & f_kind!=4 & f_kind!=5 & p_marst>=5 & p_marst<=8 & white!=1;
** hsdo or hsgrad;
gen tmpled = sfhead==1 & p_male==0 & f_kind!=4 & f_kind!=5 & p_marst>=5 & p_marst<=8 & (lths==1 | hsgrad==1);
gen tmpageled = p_age if sfhead==1 & p_male==0 & f_kind!=4 & f_kind!=5 & p_marst>=5 & p_marst<=8 & (lths==1 | hsgrad==1);

egen sftmp = max(tmp), by(sfamid);
egen sftmpnw = max(tmpnw), by(sfamid);
egen sftmpled = max(tmpled), by(sfamid);

egen sftmpage = max(tmpage), by(sfamid);
egen sftmpagenw = max(tmpagenw), by(sfamid);
egen sftmpageled = max(tmpageled), by(sfamid);

*** some other kid in this same family;
*** This may not impose that it's my kid, but hopefully it will for most;
gen tmp2 = sfhead!=1 & p_age<18 & f_kind!=4 & f_kind!=5;
gen tmpage2 = p_age if sfhead!=1 & p_age<18 & f_kind!=4 & f_kind!=5;
egen sftmp2 = max(tmp2), by(sfamid);
egen sftmpage2 = max(tmpage2), by(sfamid);


*** see how often it might be age inappropriate to be my kid by means;
su p_age if tmp==1 & sftmp2==1;
su p_age if tmp2==1 & sftmp==1;
gen sftmpdiffage = sftmpage -sftmpage2;
gen sftmpdiffagenw = sftmpagenw -sftmpage2;
gen sftmpdiffageled = sftmpageled -sftmpage2;
su sftmpdiffage*;

*** new 10/10;
*** woman herself is a female sfhead;
gen femsfhead = 1 if tmp==1 & sftmp2==1 & sftmpdiffage>=14 & sftmpdiffage<.;
replace femsfhead=0 if femsfhead==.;
tab femsfhead f_kind, missing;
tab p_age femsfhead, missing;
tab sftmpdiffage if femsfhead==1;
tab p_relhd femsfhead, missing;

** HH contains a female sfhead;
** impose constraint that sftmp==1 & sftmp2==1 and sftmpdiff>=14;
gen tmp3 = sftmp==1 & sftmp2==1 & sftmpdiffage>=14 & sftmpdiffage<.;
gen tmp3nw = sftmpnw==1 & sftmp2==1 & sftmpdiffagenw>=14 & sftmpdiffagenw<.;
gen tmp3led = sftmpled==1 & sftmp2==1 & sftmpdiffageled>=14 & sftmpdiffageled<.;
egen h_anyfemsfhead = max(tmp3), by(hhid);
egen h_anynwfemsfhead = max(tmp3nw), by(hhid);
egen h_anyledfemsfhead = max(tmp3led), by(hhid);


*** new 10/10;
*** for comparison with femsfhead;
*** married woman in a family with a child at least 14 years younger;
*** need not be a head;
gen tmpmar = p_male==0 & f_kind!=4 & f_kind!=5 & p_marst<=4 & p_age>=15;;
gen tmpagemar = p_age if p_male==0 & f_kind!=4 & f_kind!=5 & p_marst<=4 & p_age>=15;
egen sftmpmar = max(tmpmar), by(sfamid);
egen sftmpagemar = max(tmpagemar), by(sfamid);
gen sftmpmardiffage = sftmpagemar -sftmpage2;
gen femsfmar = 1 if tmpmar==1 & sftmp2==1 & sftmpmardiffage>=14 & sftmpmardiffage<.;
replace femsfmar=0 if femsfmar==.;
tab femsfmar f_kind, missing;
tab p_age femsfhead, missing;
tab sftmpdiffage if femsfhead==1;
tab p_relhd femsfmar, missing;

*** could get same kid as both;
*** hope not often;
*** can't get same woman;
tab femsfhead femsfmar;
tab sftmpmar sftmp if sftmp2==1 & sftmpmardiffage>=14 & sftmpdiffage>=14;
tab tmp2 if sftmp==1 & sftmpmar==1 & sftmpmardiffage>=14 & sftmpdiffage>=14;
*pause;

label variable femsfhead "Woman is unmarried/separated small family head with one kid under 18";
label variable femsfmar "Woman is married in a small familywith one kid under 18";
label variable h_anyfemsfhead "HH has at least one small family with an unmarried/separated female head and at least one other kid under 18";
label variable h_anynwfemsfhead "HH has at least one small family with a non-white or Hispanic unmarried/separated female head and at least one other kid under 18";
label variable h_anyledfemsfhead "HH has at least one small family with a unmarried/separatedfemale head with <=12 years ed and at least one other kid under 18";
tab h_anyfemsfhead sftmp;
tab h_anyfemsfhead sftmp2;
tab sftmpdiffage h_anyfemsfhead;
tab h_anyfemsfhead tmp3;

*** Note in next tab, cna be 1s in any cell, as could be in another SF in my HH;
*** but vast bulk should be in 1/1;
tab sftmp sftmp2, su(h_anyfemsfhead);
tab h_anyfemsfhead sfhead if p_male==0;
tab h_anyfemsfhead sfhead if p_male==1;
drop tmp tmp2 tmp3 sftmp sftmp2 sftmpage sftmpage2 sftmpdiffage;
drop tmpnw tmp3nw sftmpnw sftmpagenw sftmpdiffagenw;
drop tmpled tmp3led sftmpled sftmpageled sftmpdiffageled;

** check means;
su h_relsubfwkid h_anysubfwkid h_anypfsubfwkid h_gt1sfamwkid h_anyfemsfhead;
for any relsubfwkid anysubfwkid anypfsubfwkid gt1sfamwkid anyfemsfhead anynwfemsfhead anyledfemsfhead: tab year h_X;


*** check how many HH with an unrelated individual kid or a householder kid;
*** in CA 0.59% of records;
gen tmp = p_age<18 & (f_kind==4 | f_kind==5);
egen htmp = max(tmp), by(hhid);
tab htmp;

** HH with some family unit and a kid;
** In CA 61%;
gen tmp2 = p_age<18 & (f_kind<=3);
egen htmp2 = max(tmp2), by(hhid);
tab htmp2;

*** how many with an unrelated individual kid or hher kid and no other kid;
*** about half;
tab htmp htmp2;

*** check if any of these are our measures of interest;
for any relsubfwkid anysubfwkid anypfsubfwkid gt1sfamwkid anyfemsfhead: tab htmp h_X if htmp2==1;
for any relsubfwkid anysubfwkid anypfsubfwkid gt1sfamwkid anyfemsfhead: tab htmp h_X if htmp2==0;

*** drop;
drop htmp* tmp*;
**pause on;
**pause;

**********************************;
* Multiple program participation *;
**********************************;
* No WIC;

gen h_anyfoodtrans = h_foodstp | h_freelun;
gen h_fswelf = (r_h_inc_pa>0 & h_foodstp==1);
gen h_fswelfssi = ((r_h_inc_pa>0|r_h_inc_sp>0) & h_foodstp==1);
gen h_saftynocaid  = h_foodstp==1 | h_freelun==1 | (r_h_inc_pa>0) | r_h_inc_sp>0 | h_public==1 | h_rentsub==1 |h_enrgyas==1;
gen h_saftywcaid   = h_foodstp==1 | h_freelun==1 | (r_h_inc_pa>0) | r_h_inc_sp>0 | h_public==1 | h_rentsub==1| h_enrgyas==1| h_anymedicaid==1;

gen h_pubsec8 = h_public | h_rentsub;

for any h_anyfoodtrans h_fswelf h_fswelfssi  h_saftynocaid h_saftywcaid h_pubsec8: tab year, su(X);


***********************************************************************;
*Education distribution, Emp-pop ratios, hours, and earnings by group *;
***********************************************************************;
gen employed = p_weeks>0; 
gen fulltime = p_weeks>=50 & p_ushrs>=35; 
gen annualhrs = p_ushrs*p_weeks;

*** MB;
** check;
tab year, su(employed);
su employed fulltime annualhrs [aw=p_marwt] , d;
*** MB end;

foreach n in employed fulltime annualhrs r_p_inc_ws {;
  gen `n'_lh = `n' if lths==1;
  gen `n'_hg = `n' if hsgrad==1;
  gen `n'_sc = `n' if somecol==1;
  gen `n'_cg = `n' if colgrad==1;
};

egen skillgroup = group(edcat p_sex white);
gen hrwage = r_p_inc_ws/annualhrs;
gen grouppop = 1;
gen numobs = 1;


**** Removed sample selection;

*** comment out;
*Puerto Rico*;
drop if statefip==43;

qui compress;

*** mb took out temp file;
***tempfile temp1;
*pause on;
*pause;
*** added drop statements;
*** takes to 1.7 meg;
*** take out h_pos h_seq p_pos;
*EK took out p_famrel_sec from the drop list because HH asked so;
rename p_famrel_sec p_famrel;	/*EK noticed this variables is the same as the post88 p_famrel variable*/;
drop h_ppind h_subnum h_division f_seq f_headinx f_fl_* f_recode f_childu18 f_rec_inc* 
	f_lowinc p_seq p_princ p_fammem p_subfammem p_popstat p_ethnicity p_higrade 
	p_gradecom p_rec* p_cow2 p_inclingh p_paidgh p_whoelsgh p_have* p_whoelshi p_hiown division 
	jshead t15plus minp_posif15 *elderly countbigf ak hi calyear fpl* incyear p_othhi  p_bwt;

** some other recodes;
** household;
gen h_lths= h_edcat==1;
gen h_hsgrad= h_edcat==2;
gen h_somecol = h_edcat==3;
gen h_colgrad=h_edcat==4;
gen t = h_lths+h_hsgrad + h_somecol + h_colgrad;
** 158 0s;
tab t, missing;
tab t noheadhh;
drop t;


gen h_agele24 = h_age<=24;
gen h_age2534 = h_age>=25 & h_age<=34;
gen h_age3544 = h_age>=35 & h_age<=44;
gen h_age4564 = h_age>=45 & h_age<=64;
gen h_age65plus = h_age>=65 & h_age<.;
gen t = h_agele24 + h_age2534 + h_age3544 + h_age4564 + h_age65plus;
tab t;
drop t;

gen h_nevmar = h_marst ==7 if year>=1989;
gen h_sepwiddiv = h_marst>=4 & h_marst<=6 if year>=1989;
gen h_married = h_marst<=3 if year>=1989;
replace h_nevmar = h_marst ==8 if year<=1988;
replace h_sepwiddiv = h_marst>=5 & h_marst<=7 if year<=1988;
replace h_married = h_marst<=4 if year<=1988;
gen t= h_nevmar + h_sepwiddiv + h_married;
tab t;
drop t;

**** fix h_wgt p_marwt, other weights for implied decimal places 1988 and earlier;
**** Note h_wgt, sf_wgt, bf_wgt, and cf_wgt are weights of relevant family/HH head;
**** h_weight is CPS HH weight;
**** p_marwt is march CPS person weight, and p_bwt is basic CPS weight;
**** f_famwgt is CPS family weight;
**** p_bwt is not in data after 1988;
**** CF* dropped;
for any h_weight p_marwt f_famwgt h_wgt sf_wgt bf_wgt : replace X = X/100 if year<=1988;
label variable h_weight "CPS household weight";
label variable p_marwt "CPS march person weight";
*label variable p_bwt "CPS basic monthly weight, not in after 1988";
label variable f_famwgt "CPS family weight";
label variable h_wgt "March height of our HH head";
label variable sf_wgt "March height of our small family head";
label variable bf_wgt "March height of our big family head";
*label variable cf_wgt "March height of Census family head";


global demog = "h_lths h_hsgrad h_somecol h_colgrad h_male h_white h_hisp h_hispdkrf h_black h_other  h_age  h_agele24 h_age2534 h_age3544 h_age4564 h_age65plus h_nevmar h_sepwiddiv h_married";
su $demog if head==1 & hprobthreshold!=1 [aw=h_wgt];
label variable h_lths "HH head HS dropout";
label variable h_hsgrad "HH head HS graduate";
label variable h_somecol "HH head has some college";
label variable h_colgrad "HH head is college graduate";
label variable h_male "HH head is male";
label variable h_white "HH head is white non-Hispanic";
label variable h_hisp "HH head is Hispanic";
label variable h_hispdkrf "HH head DK/RF Hispanic";
label variable h_black "HH head is black non-Hispanic";
label variable h_other "HH head is other non-Hispanic race";
label variable h_age "HH head age";
label variable h_agele24 "HH head is 24 or under";
label variable h_age2534 "HH head is 25-34";
label variable h_age3544 "HH head is 35-44";
label variable h_age4564 "HH head is 45-64";
label variable h_age65plus "HH head is 65 plus";
label variable h_nevmar "HH head is never married";
label variable h_sepwiddiv "HH head is sep/wid/div";
label variable h_married "HH head is married now";

for any $demog : tab year  if head==1 & hprobthreshold!=1 [aw=h_wgt], su(X);

*** recipiency;
*** vet = uc + wc + vet;
*** asset is rnt + div + int;
*** cs is CSP + ALM  + other after 1989;
cap rename r_h_inc_cs r_h_inc_othpr;
for any ws se fr vet asset othpr sp ss pa dis ret earn oth : gen h_anyinc_X = r_h_inc_X !=0;
 label variable h_anyinc_ws "HH income from wage/salary ";
 label variable h_anyinc_se "HH income from self-employment";
 label variable h_anyinc_fr "HH income from farm";
 label variable h_anyinc_vet "HH income from WC/UI/veteran's";
 label variable h_anyinc_asset "HH income from rent/dividends/interest";
 label variable h_anyinc_othpr "HH income from child support/alimony/other";
 label variable h_anyinc_sp "HH income from SSI";
 label variable h_anyinc_ss "HH income from social security";
 label variable h_anyinc_pa "HH income from public assistance";
 label variable h_anyinc_dis "HH income from disability";
 label variable h_anyinc_ret "HH income from retirement";
 label variable h_anyinc_earn "HH income from earnings";
 label variable h_anyinc_oth "HH income from non-earnings";

 label variable r_h_inc_tot "HH total money income in 2009 $";
 label variable r_h_inc_ws "HH income from wage/salary  in 2009 $";
 label variable r_h_inc_se "HH income from self-employment in 2009 $";
 label variable r_h_inc_fr "HH income from farm in 2009 $";
 label variable r_h_inc_vet "HH income from WC/UI/veteran's in 2009 $";
 label variable r_h_inc_asset "HH income from rent/dividends/interest in 2009 $";
 label variable r_h_inc_othpr "HH income from child support/alimony/other in 2009 $";
 label variable r_h_inc_sp "HH income from SSI in 2009 $";
 label variable r_h_inc_ss "HH income from social security in 2009 $";
 label variable r_h_inc_pa "HH income from public assistance in 2009 $";
 label variable r_h_inc_dis "HH income from disability in 2009 $";
 label variable r_h_inc_ret "HH income from retirement in 2009 $";
 label variable r_h_inc_earn "HH income from earnings in 2009 $";
 label variable r_h_inc_oth "HH income from non-earnings in 2009 $";

*** some of these are not in the data for earlier period, take them out;
for any h_hfairpoor h_hexcgood h_wicyn h_tanftranscc h_tanftred : gen X=. ;
global outcomes "hpovlt50 hpovlt150 hpov50100 hbelowpov hpov100200 halt1povlt50 halt1povlt150 halt1belowpov halt2povlt50 halt2povlt150 halt2belowpov h_anymedicaid h_anypublichi h_anyanyhi1 h_anyanyhi2 h_anynohi1 h_anynohi2 h_anykidpub h_anykidanyhi1 h_anykidanyhi2 h_anykidnohi1 h_anykidnohi2 h_own h_public h_rentsub h_foodstp h_freelun h_enrgyas h_relsubfwkid h_anysubfwkid h_anypfsubfwkid h_gt1sfamwkid h_anyfemsfhead h_anynwfemsfhead h_anyledfemsfhead r_h_inc_ws r_h_inc_pa r_h_inc_sp h_hfairpoor h_hexcgood h_anyinc_ws h_anyinc_se h_anyinc_fr h_anyinc_vet h_anyinc_asset  h_anyinc_othpr  h_anyinc_sp h_anyinc_ss h_anyinc_pa h_anyinc_dis h_anyinc_ret h_anyinc_earn h_anyinc_oth h_wicyn h_tanftranscc h_tanftred h_pubsec8 h_fswelf h_fswelfssi h_saftynocaid h_saftywcaid";
su $outcomes if head==1 & hprobthreshold!=1 [aw=h_wgt];
label variable hpovlt50 "HH income <50% of poverty threshold";
label variable hpovlt50 "HH income <150% of poverty threshold";
label variable hpov50100 "HH income in 50-100% of poverty threshold";
label variable hbelowpov "HH income <100% of poverty threshold";
label variable hpov100200 "HH income in 100-200% of poverty threshold";
label variable h_anymedicaid "Someone in HH on Mediciad";
label variable h_anypublichi "Someone in HH on Medicaid/Medicare/Military/CHIP post 01";
label variable h_anyanyhi1 "Someone in HH on private/Medicaid/Medicare/Military/CHIP 01 HI";
label variable h_anyanyhi2 "Someone in HH on group HI/Medicaid/Medicare/Military/CHIP 01 HI";
label variable h_anynohi1 "Someone in HH without HI anyhi1 concept ";
label variable h_anynohi2 "Someone in HH  without HI anyhi2 concept";
label variable h_anykidpub "Kid in HH on Medicaid/Medicare/Military/CHIP post 01";
label variable h_anykidanyhi1 "Kid in HH on private/Medicaid/Medicare/Military/CHIP 01 HI ";
label variable h_anykidanyhi2 "Kid in HH on group/Medicaid/Medicare/Military/CHIP 01 HI ";
label variable h_anykidnohi1 "Kid in HH without HI anyhi1 concept";
label variable h_anykidnohi2 "Kid in HH without HI anyhi2 concept";
label variable h_own "HH dwelling owned";
label variable h_public "HH in public housing ";
label variable h_rentsub "HH got rent subsidy from government";
label variable h_foodstp "Someone in HH on Food Stamps last year ";
label variable r_h_fsval "Value of HH Food Stamps in 2009 $";
label variable h_freelun "Some kid 5-18 in HH on free/reduced school lunch";
label variable h_enrgyas "Someone in HH got LIHEAP";
label variable r_h_enrgyva "Value of HH LIHEAP Oct - Dec, in 2009$";
label variable r_h_inc_ws "HH wage/salary income ";
label variable r_h_inc_pa "HH public assistance income";
label variable r_h_inc_sp "HH SSI income";
label variable h_hfairpoor "HH had someone with fair/poor health";
label variable h_hexcgood "HH had someone with good/excellent health";
label variable h_wicyn "Someone in HH on WIC ";
label variable h_tanftranscc "Someone in HH got transportation ass. or did community service (non-cash TANF) ";
label variable h_tanftred "Someone in HH got job training/readiness/club, or went to GED classes (non-cash TANF)";
label variable h_pubsec8 "HH is in public housing or has subsidized rent";
label variable h_fswelf "Someone in HH on FS and cash welfare ";
label variable h_fswelfssi "Someone in HH on FS and either cash welfare or SSI ";
label variable h_saftynocaid "Someone in HH on FS/School lunch/cash welf/SSI/public housing/section 8/LIHEAP";
label variable h_saftywcaid "Someone in HH on FS/School lunch/cash welf/SSI/public housing/section 8/LIHEAP/Medicaid";
label variable halt1povlt50 "HH alt1 income <50% of poverty threshold";
label variable halt1povlt150 "HH alt1 income <150% of poverty threshold";
label variable halt1belowpov "HH alt1 income <100% of poverty threshold";
label variable halt2povlt50 "HH alt2 income <50% of poverty threshold";
label variable halt2povlt150 "HH alt2 income <150% of poverty threshold";
label variable halt2belowpov "HH alt2 income <100% of poverty threshold";

*** name file saving to because of memory issues;
*** debugging;
**  save small8088_recode, replace;
save marcps8088_recode, replace;

*/
*DC adds 1988B;
*DC adds 2014R;

!date;
**************************************************;
*1988B-2015                                       *;
**************************************************;
#delimit ;
clear;

* for debugging just use CA or AL;
** use /home/research/cycles/march/marcps8914 if statefip==1;
**Zuse /data/brook/cycles/march/marcps8914 if statefip==1;
use marcps88b17.dta;
*//deal with recode;
** Z use /data/brook/cycles/march/marcps8914.dta;
tempfile marcps;
save `marcps', replace;

*DC ends;

*** already in data;
rename h_numpers h_numpers_cps;

** MB comment out;
** drop f_*;
** MB end;

egen hhid = group(year h_seq);
*** to get nobs;
su hhid;

** MB to check with totals;
tab year, su(hhid);

drop if statefip==0;
label variable statefip "State FIPS code";
*This labels state and MSA values*;
do label; 

*Add Oakland and SF Together*;
replace h_smsa = 7360 if h_smsa==5775 & year>=1986;
*Add Boulder to Denver*;
replace h_smsa = 2080 if h_smsa==1125 & year>=1986;

gen division = 1 if statefip ==9 | statefip==23 | statefip==25 | statefip==33 | statefip==44 | 
	statefip==50;
replace division = 2 if statefip==34 | statefip==36 | statefip==42;
replace division = 3 if statefip==18 | statefip==17 | statefip==26 | statefip==39 | statefip==55;
replace division = 4 if statefip==19 | statefip==20 | statefip==27 | statefip==29 | statefip==31 | 
	statefip==38 | statefip==46;
replace division = 5 if statefip==10 | statefip==11 | statefip==12 | statefip==13 | statefip==24 | 
	statefip==37 | statefip==45 | statefip==51 | statefip==54;
replace division = 6 if statefip==1 | statefip==21 | statefip==28 | statefip==47;
replace division = 7 if statefip==5 | statefip==22 | statefip==40 | statefip==48;
replace division = 8 if statefip==4 | statefip==8 | statefip==16 | statefip==35 | statefip==30 | 
	statefip==49 | statefip==32 | statefip==56;
replace division = 9 if statefip==2 | statefip==6 | statefip==15 | statefip==41 | statefip==53;

tab statefip, missing;
tab division, missing;

save `marcps', replace;


***************************************************;
* Create and Merge all SPM Micro Data             *;
***************************************************;
#delimit cr
clear all

/*
This part of the do file appends together the 2009, 2010 and 2011 SPM micro data, so that it is 
ready to be merged to the CPS data.
File created by Elira Kuka on 11/18/2012
Last Updated: 1/17/2012
*/

*DC adds 2012,2013 and 2014, but needs to change name of variables first;

/*KR checked for 2016 SPM measure 2/3/2018: Still not up*/
forval i=2012(1)2015 {
	use "`spm_raw'/spmresearch`i'.dta"
	tempfile spm`i'
	rename h_seq H_SEQ
	rename spmu_id SPMu_ID
	rename spmu_poor SPMu_Poor
	rename spmu_povthreshold SPMu_PovThreshold 
	rename spmu_equivscale SPMu_EquivScale
	rename spmu_geoadj SPMu_GeoAdj
	rename spmu_numper SPMu_NumPer
	rename spmu_numkids SPMu_NumKids
	rename spmu_numadults SPMu_NumAdults
	rename spmu_tenmortstatus SPMu_TenMortStatus
	rename spmu_resources SPMu_Resources
	rename spmu_totval SPMu_totval
	rename spmu_snapsub SPMu_SNAPSub 
	rename spmu_caphousesub SPMu_CapHouseSub
	rename spmu_schlunch SPMu_SchLunch
	rename spmu_engval SPMu_EngVal
	rename spmu_wicval SPMu_WICval
	rename spmu_fedtax SPMu_FedTax
	rename spmu_fedtaxbc SPMu_FedTaxBC
	rename spmu_eitc SPMu_EITC
	rename spmu_actc SPMu_ACTC
	rename spmu_fica SPMu_FICA
	rename spmu_sttax SPMu_stTax
	rename spmu_childsuppd SPMu_ChildSupPd
	rename spmu_capwknchcarexpns SPMu_CapWknChCareXpns
	rename spmu_wkxpns SPMu_WkXpns
	rename spmu_childcare SPMu_ChildCare
	rename spmu_medoopnmcareb SPMu_MedOOPnMCareB
	rename spmu_hage SPMu_Hage
	rename spmu_wcohabit SPMu_wCohabit
	rename spmu_hhisp SPMu_HHisp
	rename spmu_hmaritalstatus SPMu_HMaritalStatus
	rename spmu_hrace SPMu_HRace
	rename spmu_newfam SPMu_NewFam
	rename spmu_wnewhead SPMu_wNewHead
	rename spmu_wnewparent SPMu_wNewParent
	rename spmu_wui_lt15 SPMu_wUI_LT15
	rename spmu_weight  SPMu_Weight
	
	*KR added 4/9/2017: year already defined in 2015 file (for 2016 survey)
	cap replace year = .
	
	save `spm`i''
	}

use "`spm_raw'/spmresearch2009.dta", clear
destring A_LINENO pppos, replace
gen year=2010

append using "`spm_raw'/spmresearch2010.dta"
replace year=2011 if year==.

append using "`spm_raw'/spmresearch2011.dta"
replace year=2012 if year==.

append using `spm2012'
replace year=2013 if year==.

append using `spm2013'
replace year=2014 if year==.

append using `spm2014'
replace year=2015 if year==.

append using `spm2015'
replace year=2016 if year==.

*DC/KR ends;

rename H_SEQ h_seq
rename pppos p_pos
tab year
describe

**********************************************
*** Check how the SPM poor status is created, so we can modify it according to need in the future
*Total SPM Unit resources smaller than SPM unit threshold
gen spmu_poor=SPMu_Resources<SPMu_PovThreshold
assert spmu_poor==SPMu_Poor

*Total SPM Unit resources are:
replace ferp_val=0 if ferp_val==.

gen tot_resources = SPMu_totval + SPMu_SNAPSub + SPMu_CapHouseSub + SPMu_SchLunch + SPMu_EngVal + ///
	SPMu_WICval - SPMu_FedTax + ferp_val - SPMu_FICA - SPMu_stTax - SPMu_ChildSupPd - ///
	SPMu_CapWknCh - SPMu_MedOOPnMCareB

capture noisily assert tot_resources==SPMu_Resources

gen diff = tot_resources - SPMu_Resources
sum diff tot_resources SPMu_Resources
drop diff	 

*Check Total Federal Taxes and how they are calculated/ what they include
gen fed_tot = SPMu_FedTaxBC - SPMu_EITC
capture noisily assert fed_tot==SPMu_FedTax
gen diff = fed_tot - SPMu_FedTax

* Difference!
bysort year: sum diff fed_tot SPMu_FedTax SPMu_FedTaxBC SPMu_EITC
sum diff fed_tot SPMu_FedTax SPMu_FedTaxBC SPMu_EITC, d

*Why? Explore. 
table SPMu_NumPer SPMu_NumKids, c(mean diff)		// no much info
table SPMu_HMarital famhead, c(mean diff)			// no much info

/* What about the refundable portion of the child tax credit? Should be included in FedTax (net fed  
tax). But is it included or not in the FedTaxBC (before credits) variable?
SPM research paper (Short 2012) talks about how much would poverty be without EITC and CC, saying that 
without those credits poverty rate would go from 16.1 to 18.9. I get very close for calendar year 
2011, but not for 2009 and 2010. 
But, for those years documents never mention the additional child credit, so if I instead just subtract
the EITC I get the same results as Short (2011) on the effect of EITC on poverty. */

gen tot_resources2 = SPMu_totval + SPMu_SNAPSub + SPMu_CapHouseSub + SPMu_SchLunch + SPMu_EngVal + ///
	SPMu_WICval - SPMu_FedTaxBC + ferp_val - SPMu_FICA - SPMu_stTax - SPMu_ChildSupPd - ///
	SPMu_CapWknCh - SPMu_MedOOPnM			// ignore tax credits
	
gen tot_resources3 = tot_resources - SPMu_EITC		// subtract just EITC

gen poor_nocred = tot_resources2 < SPMu_PovThreshold
gen poor_noeitc = tot_resources3 < SPMu_PovThreshold

* Poverty rates including and excluding credits
table year [pw=marsupwt], c(mean SPMu_Poor mean poor_nocred mean poor_noeitc) col

/* POSSIBLE CONCLUSION: Divergence between FedTax and (FedTaxBC - EITC) might be due to the fact that 
maybe we should include the additinal child credit in the calculation, ie. it should be
(FedTaxBC - EITC - ACC). I am guessing the Census decided to include ACC in the credits definition 
since 2011, but then maybe updated it backwards so now all years are consistent.
*/


keep h_seq p_pos year SPMu_Poor SPMu_Resources SPMu_PovThreshold SPMu_totval SPMu_SNAPSub ///
	SPMu_CapHouseSub SPMu_SchLunch SPMu_EngVal SPMu_WICval SPMu_FedTax SPMu_FedTaxBC SPMu_EITC ///
	ferp_val SPMu_FICA SPMu_stTax SPMu_ChildSupPd SPMu_CapWknC* SPMu_MedOOP* SPMu_Weight marsupwt
compress
save "`spm_raw'/spmdata", replace


**********************************************
*** Merge with March CPS data
use `marcps', clear
merge 1:1 h_seq p_pos year using "`spm_raw'/spmdata"
tab year _merge
drop _merge

preserve

**********************************************
*** REPLICATE SHORT 2010 (P60-241) REPORT 		
* Need calendar years 2009/10 to compare our poverty rates with poverty rates in Short (2010)
* This code is just to replicate the Short tables, so after we have replicated it we restore the data

keep if year==2010 | year==2011 | year==2012

****** Generate unrelated children under 15 
gen unrel = p_age<15 & f_type==5
gen calyear=year-1
tab calyear unrel, missing

*Transform weight in thousands so we get counts in thousands
replace p_marwt=p_marwt/1000
replace SPMu_Weight=SPMu_Weight/100000
replace marsupwt=marsupwt/100000
sum p_marwt marsupwt SPMu_Weight

capture noisily assert p_marwt== marsupwt
gen temp=p_marwt!= marsupwt
replace temp=. if p_marwt==. | marsupwt==.
tab temp, missing
list p_marwt marsupwt if temp==1 & _n<200
gen diff = p_marwt - marsupwt
sum diff if temp==1

****** Create race and age variables (race code taken from marchcpsfamilyhh.do file)
*** Hispanic
gen hisp = p_ethnicity~=8 & p_ethnicity>0 & year<=2002
*doesn't know or refused to answer*;
	replace hisp=. if (p_ethnicity==9 | p_ethnicity==10) & year<=2002 
	replace hisp = 0 if year>2002
	replace hisp = 1 if year>2002 & p_ethnicity==1
tab hisp, missing
gen hispdkrf = hisp==.
replace hisp=0 if hispdkrf==1
*check
tab hisp, missing

*** White non-Hispanic
gen white = p_race==1 & hisp==0 & hispdkrf==0

*** Black
gen blackhisp = p_race==2 | hisp==1 if year<=2002
replace blackhisp = (hisp>0 | p_race==2 | p_race==6 | p_race==10 | p_race==11 | p_race==12 | ///
	p_race==15 | p_race==16 | p_race==19) if year>2002
tab blackhisp, missing

gen black = p_race==2 & hisp==0 if year<=2002
replace black = (hisp==0 & (p_race==2 | p_race==6 | p_race==10 | p_race==11 | p_race==12 | ///
	p_race==15 | p_race==16 | p_race==19)) if year>2002
tab black, missing

gen other = black==0 & hisp==0 & white==0 & hispdkrf==0

su other black hisp white hispdkrf

*Check population numbers population
gen popul=1
table calyear [pw=p_marwt], c(sum popul sum black sum white sum hisp) col	

* Black definiton seems wrong since the total counts do not match the published statistics
gen new_black=p_race==2	
table calyear [pw=p_marwt], c(sum popul sum new_black sum white sum hisp) col	// correct now

***** Generate official poverty
gen officialcpspov = f_famlis==1
tab officialcpspov, missing

*Generate age groups to compare our poverty rates with the ones in Census
gen agegroup=1 if p_age<18
replace agegroup=2 if p_age>=18 & p_age<65
replace agegroup=3 if p_age>=65
tab p_age agegroup


*** REPLICATE PARTS OF TABLE 1 IN SHORT (2010)
/* Some small differences in the counts of poor for the SPM measure.*/

*First, official poverty and SPM by age
table calyear agegroup [pw=p_marwt], c(sum popul mean officialcpspov sum officialcpspov) col
table calyear agegroup [pw=p_marwt], c(mean SPMu_Poor sum SPMu_Poor) col
table calyear agegroup [pw=marsupwt], c(mean SPMu_Poor sum SPMu_Poor) col
table calyear agegroup [pw=SPMu_Weight], c(mean SPMu_Poor sum SPMu_Poor) col


*Second, official poverty and SPM by race
table calyear [pw=p_marwt] if white==1, c(mean officialcpspov sum officialcpspov)
table calyear [pw=p_marwt] if white==1, c(mean SPMu_Poor sum SPMu_Poor)

table calyear [pw=p_marwt] if hisp==1, c(mean officialcpspov sum officialcpspov)
table calyear [pw=p_marwt] if hisp==1, c(mean SPMu_Poor sum SPMu_Poor)

table calyear [pw=p_marwt] if new_black==1, c(mean officialcpspov sum officialcpspov)
table calyear [pw=p_marwt] if new_black==1, c(mean SPMu_Poor sum SPMu_Poor)				

restore

save `marcps', replace


***************************************************;
*Create and Merge NAS Micro Data                  *;
***************************************************;
#delimit cr
/*
This part of the do file appends together all Experimental (NAS) Poverty micro data files, from 
calendar years 1997 to 2010. The data was downloaded from the NBER website.

The variables are named inconsistently throughout the years. To fix this, EK first cleans each year
individually, then appends them together. 

EK creates our own msi-ga-ce poverty status variable, with the formulas taken from the SAS code in the 
Census website. Then she makes sure our variables are the same as the ones created by the Census. 

Finally, she keeps in the data the NAS poverty status variables as well as all necessary data needed
to create those variables, so that in the future we can re-create and modify them according to need.

Original files either at the NBER website or at http://www.census.gov/housing/povmeas/. From these 
wesbsites, EK looked at the SAS code to see how the poverty measures are constructed. 
ATTN: Not sure yet whether 1997-1999 poverty measure are consistent with the later years.

File created by Elira Kuka on 11/18/2012
Last updated: 1/17/2012

*/


*********** 2000/2010 ***********

/* The 2000 data file on the Census or NBER websites did not contain the variable feitc, which is 
needed for the family income calculations. EK was in contact with Trudi Renwick, and she provided
the file feitc2000.dta, which contains the feitc variable plus the two merging variables 
(h_seq and pppos). Here I prepare that file so that we can easily merge it to the 2000 data */
use "`nas_raw'/feitc2000.dta", clear
rename pppos p_pos
destring h_seq p_pos, replace
tempfile feitc
save `feitc', replace

// On 3/3/2016, EK adds 14-11 data since we need the variables for the new poverty definitions. But she does not create 
// the thresholds and MSI-GA-CE poverty for those years since we are not using that at all.

foreach i in 14 13 132 12 11 10 09 08 07 06 05 04 03 02 00 {
	use "`nas_raw'/povpu`i'.dta", clear
	tempfile naspov`i'
	if `i'==132 {
		gen calyear=2013
		gen redesigned = 1
	}
	else gen calyear=20`i'
	display ""
	display "THIS IS CALENDAR YEAR " calyear
	display ""
	capture noisily rename ppos p_pos
	capture noisily rename pppos p_pos
	rename ffpos f_pos
	capture noisily rename F_MV_FS f_mv_fs
	capture noisily rename H_SEQ h_seq
	capture noisily rename pwgt p_marwt
	capture noisily rename marsupwt p_marwt
	capture destring h_seq p_pos f_pos poor4, replace
	
		
	*In earlier years, and more importantly in the rest of this code, we use f_mv_sl instead of ///
		fmvfsl and fmvrsl, so we add them together here
	for any fmvfsl fmvrsl: replace X=0 if X==.
	gen f_mv_sl = fmvfsl + fmvrsl
	
	** Notes: Threshold for MSI-GA-CE derived from most recent available 12 quarters of Consumer ///
	Expenditure Survey data
	
	if `i'>10  {
		gen thresh_cega=.
		gen faminc_msi=.
	}
		
	if `i'==10 {
	*Threshold for MSI-GA-CE 
	gen thresh_cega=24267 * scaleb * geo2    	// ce , geo
	
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica - ffedtax - fsttax + f_mv_fs + f_mv_sl + fengval	///
		- combexpc + fmrcapce - newmoopcap	
	// fmrcapce is capped housing subsidy based on the FMR used in CE thresholds; no houssub	
	}
	
	if `i'==09 {
	*Threshold for MSI-GA-CE 
	gen thresh_cega=24522 * scaleb * geo2		// ce , geo
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica - ffedtax - fsttax + f_mv_fs + f_mv_sl + fengval ///
		+ ferp_val - combexpc + fmrcapce - newmoopcap	
	}
	
	if `i'==08 {
	*Threshold for MSI-GA-CE
	gen thresh_cega=24755 * scaleb * geo2		// ce , geo
	
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica - ffedtax + fstimulus - fsttax + f_mv_fs + f_mv_sl ///
		+ fengval - combexpc + fmrcapce - newmoop
	}
	
	if `i'==07 {
	*Threshold for MSI-GA-CE
	gen thresh_cega=23465 * scaleb * geo2		// ce , geo
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + f_mv_fs + f_mv_sl ///
		+ fengval - combexpc + fmrcapce - newmoopcap
	}
	
	if `i'==06 {
	*Threshold for MSI-GA-CE
	gen thresh_cega=21818 * scaleb * geo2		// ce , geo
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + f_mv_fs  ///
           + f_mv_sl + fengval - combexpc + fmrcapce - newmoop	
	}
	
	if `i'==05 {
	*Threshold for MSI-GA-CE
	gen thresh_cega=20708 * scaleb * geo2		// ce , geo
	*Income definitions for MSI-GA-CE
	rename ffedtaxa ffedtax
	rename fsttaxa fsttax
	drop ffedtaxb fsttaxb
	gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + f_mv_fs + ///
    	f_mv_sl + fengval - combexpc + fmrcapce - newmoop
    }
    	
    if `i'==04 {
	*Threshold for MSI-GA-CE
	gen thresh_cega=19984 * scaleb * geo2		// ce , geo
	*Income definitions for MSI-GA-CE
	rename ffedtaxa ffedtax
	rename fsttaxa fsttax
	drop ffedtaxb fsttaxb
	*No weight in this file! No problem, we can get it later from March CPS
	gen p_marwt=1
	gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + f_mv_fs + ///
    	f_mv_sl + fengval - combexpc + fmrcapce - newmoop
    }	
    	
    if `i'==03 {
	*Threshold for MSI-GA-CE
	gen thresh_cega=19778 * scaleb * geo2		// ce , geo
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + feitc + f_mv_fs + ///
    	f_mv_sl + fengval - combexpc + fmrcapce - newmoop
    }
    	
    if `i'==02 {
	*Threshold for MSI-GA-CE
	gen thresh_cega=19329 * scaleb * geo2		// ce , geo
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + feitc + f_mv_fs + ///
    	f_mv_sl + fengval - combexpc + fmrcapce - newmoop
    }
    
    if `i'==00	 {
    * First merge the feitc file.
    merge 1:1 h_seq p_pos using `feitc'
    drop _merge

    *Poverty status variable names are different for 2000, so here I change them accordingly
    drop poor4
    rename poor8 poor4

	*Threshold for MSI-GA-CE. 
	*Need to use threshold provided because the SAS code does not provide instructions on how to
	gen thresh_cega=nl3
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica  + fcapgain - fcaploss - ffedtax - fsttax + feitc + f_mv_fs  ///
        + f_mv_sl + fengval - combexpc + fmrcapce - newmoop	
	}
	
	*Generate poverty indicator	
	gen msigace=faminc_msi<thresh_cega
	replace msigace=. if thresh_cega==.
	*replace msigace=. if faminc_msi==.
	
	*Make sure that the variable EK created is the same as the one in the Census
	capture noisily assert msigace==poor4
	gen temp=msigace-poor4
	tab temp, missing
	
	gen diff=thresh_cega-faminc_msi if temp!=0 | temp==.
	sum diff h_seq thresh_cega faminc_msi msigace poor4 if temp!=0 | temp==.
	if `i'==04 list poor4 msigace faminc_msi thresh_cega scaleb geo2 ftotval ffica fcapgain ///
    	fcaploss ffedtax fsttax f_mv_fs f_mv_sl fengval combexpc fmrcapce newmoop if temp!=0 
	
	*These are the variables that are needed for the creation of poverty status
	*There are a few differences across years
	local keepings h_seq p_pos f_pos poor4 scaleb geo2 ftotval ffica ffedtax fsttax ///
		f_mv_fs f_mv_sl fengval combexpc fmrcapce thresh_cega faminc_msi msigace ///
		calyear p_marwt
	
	if `i'>=10 keep `keepings' newmoo*
	if `i'==09 keep `keepings' newmoo* ferp_val
	if `i'==08 keep `keepings' fstimulus newmoo*
	if `i'==07 keep `keepings' fcapgain fcaploss newmoo*
	if `i'==04 | `i'==05 | `i'==06 keep `keepings' fcapgain fcaploss newmoo*
	if `i'==00 | `i'==02 | `i'==03 keep `keepings' feitc fcapgain fcaploss newmoo*
	describe
	save `naspov`i'', replace
}


*********** 2001 ***********

/*DATA ISSUE: 2001 data does not contain any poverty variables. EK calculates them but cannot make sure 
our results are OK.*/
use "`nas_raw'/povpu01.dta", clear
tempfile naspov01
gen calyear=2001
rename pppos p_pos
rename ffpos f_pos
rename H_SEQ h_seq
rename F_MV_FS f_mv_fs
rename marsupwt p_marwt

*In earlier years, and more importantly in the rest of this code, we use f_mv_sl instead of ///
		fmvfsl and fmvrsl, so we add them together here
gen f_mv_sl= fmvfsl + fmvrsl
	

*Threshold for MSI-GA-CE
gen thresh_cega=18709 * scaleb * geo2         	/* ce, geo */  

*Income definitions for MSI-GA-CE
gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + feitc + f_mv_fs + ///
	f_mv_sl + fengval - combexpc + fmrcapce - newmoop

*Generate poverty indicator
gen msigace=faminc_msi<thresh_cega						/* msi, ce   */
replace msigace=. if thresh_cega==.

sum msigace faminc_msi thresh_cega

capture destring h_seq p_pos f_pos, replace

keep h_seq p_pos f_pos scaleb geo2 ftotval ffica fcapgain fcaploss ffedtax fsttax feitc f_mv_fs ///
	f_mv_sl fengval combexpc fmrcapce newmoo* msigace faminc_msi thresh_cega calyear p_marwt
describe
save `naspov01', replace



*********** 1997/1999 ***********
/*EK finds that npoor2 seems to be the closest in definition to msi-ce-ga variable, which in the 
2000-2010 years is under the name poor4.*/

foreach i in 99 98 97 {
	use "`nas_raw'/povpu`i'.dta", clear
	tempfile naspov`i'
	gen calyear=19`i'
	display ""
	display "THIS IS CALENDAR YEAR " calyear
	display ""
	
	*CHECK: Pay ATTN, person id variable may be wrong
	capture noisily rename ppposold p_pos
	rename ffpos f_pos
	capture noisily rename H_SEQ h_seq
	capture noisily rename POOR1 poor1
	capture noisily rename GEO1 geo1
	rename F_MV_FS f_mv_fs
	rename F_MV_SL f_mv_sl
	rename marsupwt p_marwt
	rename combexp combexpc

	destring h_seq p_pos f_pos, replace
	
	/* THIS IS DOWNLOADED SAS CODE THAT SHOWS THE DEFINITIONS OF INCOME:
	def001= ftotval;               /* official income definition */
	def013= def001 - ffica + fcapgain - fcaploss - ffedtax - fsttax + feitc
        + f_mv_fs + housub + f_mv_sl + fengval -  moop;
	def017= def013 - combexpc;      /* NAS Panel definition */
	*/
	
	*Income definitions for MSI-GA-CE
	gen faminc_msi = ftotval - ffica + fcapgain - fcaploss - ffedtax - fsttax + feitc + f_mv_fs + ///
    	housub + f_mv_sl + fengval - combexpc - moop	

	*Threshold for MSI-GA-CE
	
	if `i'==99 {
	rename NPOOR14 poor4		// Poverty variable in 1999
		
	/*nl1-nl12, which are the Consumer Expenditure Survey Thresholds, are missing in 1999.
   	Only the available this year are nl13-nl24, which use 1997 Consumer Expediture Survey
   	Thresholds updated to 1999 using the consumer price index (CPI)*/ 
   	
	*Threshold for MSI-GA-CE
	gen thresh_cega = 16606 * scalepm * geo1		// ce, geo.  cpi back-dated 
	}
	
	if `i'==98 {
	rename NPOOR2 poor4			// Poor variable in 1998
	*Threshold for MSI-GA-CE
	gen thresh_cega=16401 * scalepm * geo1
	}
	
	if `i'==97 {
	rename NPOOR2 poor4    		// Poor variable in 1997
	*Threshold for MSI-GA-CE
	gen thresh_cega=15998 * scalepm * geo1	
	}
	
	
    *Generate poverty indicator	
	gen msigace=faminc_msi<thresh_cega
	replace msigace=. if thresh_cega==.
	
	*Make sure that the variable EK created is the same as the one in the Census
	capture noisily assert msigace==poor4
	gen temp=msigace-poor4
	tab temp, missing			// Missing due to unrelated children
	sum h_seq msigace poor4 faminc_msi thresh_cega if temp==.
	
	*These are the variables that are needed for the creation of poverty status
	keep h_seq p_pos f_pos poor4 scalepm geo1 ftotval ffica ffedtax fsttax feitc fcapgain fcaploss ///
    	housub fengval combexpc moop thresh_cega faminc_msi msigace f_mv_fs f_mv_sl calyear p_marwt
	describe
	save `naspov`i'', replace	
}


*********** APPEND TOGETHER ***********
use `naspov10', clear

foreach i in 97 98 99 00 01 02 03 04 05 06 07 08 09 11 12 13 132 14 {
	di "CALENDAR YEAR is `i'"
	append using `naspov`i''
}
gen year=calyear+1		// Year variable represents March CPS year
bysort year: sum combexpc newmoo*

* Label variables
label var thresh_cega "Poverty threshold, with CE and GA criteria"
label var faminc_msi "Total family income, MSI def"

foreach x in ftotval ffica ffedtax fsttax f_mv_fs f_mv_sl fengval combexpc fmrcapce newmoopcap ///
	ferp_val fstimulus newmoop moop housub fcapgain fcaploss feitc {
	gen empty_`x'=`x'==.					// To analyze missing variables
	tab year empty_`x', missing			// Missing in years when data is not used, or for unrelated children
	// Replace as zeros so that in the future we can add them all together without causing problems/drops 
	replace `x'=0 if `x'==.		
}	
drop empty_*

/* The 1997-1999 NAS variables have p_pos going from 1 to 16 (in March CPS instead they all go from
41 to 56). So here I add 40 to those years, to get similar codes. 
Before modifying this variable, I did not get any merging correct for those years. After I do this, 
all variables merge.*/
replace p_pos=p_pos+40 if year<2001

* Check observations
tab year, missing

* These are variables present in both datasets
rename f_mv_sl fmvsl_nas
rename f_mv_fs fmvfs_nas
rename ftotval ftotval_nas
rename p_marwt weight_nas

save "`nas_raw'/nasdata.dta", replace



*********** MERGE TO MARCH CPS DATA ***********		
use `marcps', clear	

/*The 2001 CPS data that we had initially was the old, smaller march sample with only 130K 
observations, while the 2000 NAS data contains the SCHIP extended sample with around 220K 
observations. So now we use the SCHIP extended march sample too. */
tab year if year==2001, missing	

*bysort year: sum p_pos				// To check that p_pos is always between 41 and 56 in March CPS
 
merge 1:1 h_seq p_pos year using "`nas_raw'/nasdata.dta"
tab year _merge, missing
tab p_age year if _merge==1 & year>1997, missing		// All nonmerges are under age 15 or from year 2001
tab p_age year if _merge==2 & year>1997, missing		// All nonmerges are from years 2001


preserve

*********************************
*********** REPLICATE PUBLISHED POVERTY RATES ***********

*** Generate all unrelated children under 15, and drop them
gen unrel = p_age<15 & (f_type==5)
tab year unrel, missing
drop if unrel==1

*** Make sure I dropped correct observations
tab p_age year if _merge==1 & year>1997, missing		
tab p_age year if _merge==2 & year>1997, missing

*** Make sure weights are the same (they should differ only for CPS year 2005)
* Weights in 1998-2000 are in different units
for any 1998 1999 2000: sum p_marwt weight_nas if year==X
replace weight_nas=weight_nas*1000 if year<2001 & year>1997
for any 1998 1999 2000: sum p_marwt weight_nas if year==X

capture noisily assert p_marwt==weight_nas if year!=2005 & year>1997

/*Explore why the difference in weight. This difference arises because of the non-merges in the 2001
data. */
gen diff_weight=0 if p_marwt==weight_nas 
replace diff_weight=1 if p_marwt!=weight_nas
tab year diff_weight if year>1997, missing

* Weird. It looks like weights are exactly the same despite the diff_weight variable being 1
* Ignore this now, but maybe HH or MB have an idea of why this is happening?
list h_seq year p_marwt weight_nas if diff_weight==1 & h_seq<1000 & year>1997 & year!=2005 

tab diff_weight _merge if year>1997, missing
tab diff_weight _merge if year!=2001 & year>1997, missing

* Make sure that the variable EK created is the same as the one in the Census
capture noisily assert msigace==poor4
gen temp=msigace-poor4
tab year temp, missing

replace p_marwt=p_marwt/1000	// Transform in thousands

* Generate age groups to compare our poverty rates with the ones in Census
gen agegroup=1 if p_age<18
replace agegroup=2 if p_age>=18 & p_age<65
replace agegroup=3 if p_age>=65
*tab p_age agegroup			// To check that the groups are correct


* Create official poverty rate (code as in other-figs-tables.do file)
gen officialcpspov = f_famlis==1
tab officialcpspov, missing


***************
/** ATTN: Initially, we were trying to replicate the following:
Table B.3: Official and National Academy of Sciences (NAS) Based Poverty Rates: 1999 to 2011
But after we talked to Trudi Renwick, we realized that these poverty rates are calculated with
internal files only, while our data is from public use files. 
The Census provides poverty rates calculated with the public files, which we can replicate. 
These rates are found in the following link: http://www.census.gov/housing/povmeas/pov10/povpu10.lst
(this link is for 2010 only). To get poverty rates for other years just modify `10' into the year of
interest. As an example, the file whose link I provided above has the following poverty rate for 2010:

msi-CE

                                  Cumulative    Cumulative
poor4    Frequency     Percent     Frequency      Percent
----------------------------------------------------------
0        258205.4       84.47      258205.4        84.47
1        47482.25       15.53      305687.7       100.00


Here are the public file poverty rates for MSI-GA_CE for each year, as collected by EK from the
above links.

1997	1998	1999	2000	2001	2002	2003	2004	2005	2006	2007	2008	2009	2010	
15.4	14.6	13.8	12.3	13.09	13.33	13.79	12.43	12.66	12.78	15.27	15.77	15.72	15.53

Note: 2004-2006 are unweighted poverty rates.
*/

*** REPLICATION
*In the published poverty results, some numbers are weighted and some not		
table calyear [pw=p_marwt] if calyear<2004 | calyear>2006, ///
	c(mean officialcpspov mean msigace mean poor4) col 		
table calyear if calyear>2003 & calyear<2008, c(mean officialcpspov mean msigace mean poor4) col 

restore
drop _merge weight_nas


***************************************************;
*Generating household variables                   *;
***************************************************;
#delimit ;
*=Identified as HH Head*;
gen head = 1 if (p_relhd==1 | p_relhd==2); 
*One person households*;
replace head = 1 if h_numpers==1; 

*** MB changes to head concept;
**** Note using count works here because head is . if not 1 ******;
* fill in 0s below *;
bysort hhid: egen headcount = count(head);
tab headcount;
*** check that MB definition of head is more sensible than JS *;
*** 529 observations, all 89-93;
* Mark these households for checking characteristics of JS/MB head below *;
gen noheadhh = headcount==0;

*** says they are all partner or relative of householder, and all are secondary individual families;
tab year f_type if headcount==0;
tab year p_relhd if headcount==0;

*** JS picks first one ****;
*** Can lead to infants being heads ****;
*In HH's full of unrelated individuals, pick the first one as head*;
bysort hhid: gen h1 = 1 if _n==1; 
gen jshead = head;
replace jshead = 1 if h1==1 & headcount==0;
drop headcount;
bysort hhid: egen headcount = count(head);
*Now hope that each family has one head*;
tab headcount; 
* For 77-88,MB assigns to person with h_ppind=p_pos, p_princ=1, and p_age >=15 *;
* here at least pick p_age >=15 *;
* Does not do every HH, are still some HH with noone, pick first adult 15 and up*;
* NEW with weight = HH weight*;
* code first adult 15 and older with right weight *;
gen t15plus =p_pos if p_age>=15 & h_weight==p_marwt;
bysort hhid: egen minp_posif15 = min(t15plus);
replace head =1 if minp_posif15 == p_pos & headcount==0 & p_age>=15;
* Count again *;
drop headcount;
** tab count of MB measure *;
**** Note using count works here because head is . if not 1 ******;
* fill in 0s below *;
bysort hhid: egen headcount = count(head);
*Now each HH has one head*;
tab headcount; 
** tab count of JS measure *;
bysort hhid: egen jsheadcount = count(jshead);
*Now each JS HH has one head*;
tab jsheadcount; 

*** look at characteristics of JS and MB heads;
tab year if noheadhh==1;

tab year if noheadhh==1 & jshead!=head;
tab year if noheadhh==1 & jshead==head;
su p_age p_sex if jshead==1 & jshead!=head;
su p_age p_sex if head==1 & jshead!=head;


*pause;
*** noone has no head now;
su if headcount==0;

**** for debugging;
***pause on
** pause;

*** fill in head variable as 0;
replace head = 0 if head ==. & headcount!=0;
label variable noheadhh "Household without a head 15 and older";

drop headcount h1 jsheadcount;
drop jshead t15plus minp_posif15 ;
*** MB end;


gen under18 = p_age<18;
gen elderly = p_age>=65;
gen one = 1;
bysort  hhid: egen h_kidu18 = sum(under18);
bysort hhid: egen h_numpers = sum(one);
bysort hhid: egen h_elderly = sum(elderly);

cap rename p_inc_cs p_inc_othpr;
*For survey year 2016:;
cap g p_inc_othpr = 0;
	cap replace p_inc_othpr = p_inc_csp if year >= 2016;

foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth uc wc csp alm {;
	gen `n'_miss=p_inc_`n'==.;
	tab year `n'_miss, missing;
	drop `n'_miss;
	bysort hhid: egen h_inc_`n' = sum(p_inc_`n');
};

*** MB;
**** check that our sum of people matches canned number of people;
capture noisily assert h_numpers==h_numpers_cps;

**** check that hh income our way matches hhincome total canned;
tab year if h_inc_tot == h_hhinctot;
tab year if h_inc_tot != h_hhinctot;

*** non matching small number differences are TINY 1$;
gen diff = h_inc_tot-h_hhinctot;
su diff if diff~=0, d;
list hhid year h_kidu18 h_numpers h_elderly p_age p_sex h_inc_tot h_hhinctot noheadhh diff if 
	h_inc_tot!=h_hhinctot;
drop diff;

**** drop canned CPS number of people value which always matches, leave canned h_hhinctot;
drop h_numpers_cps ;

tab head;

*** MB end;



***************************************************;
*Generating small family variables                *;
***************************************************;

egen sfamid = group(year h_seq f_pos);
*** non family HHer or secondary individual;
*** this will include foster kids;
*** p_relhd==11;
gen sfhead = 1 if p_famtyp==2 | p_famtyp==5 | p_famrel==1;
**** Note using count works here because sfhead is . if not 1 ******;
* fill in 0s below *;
bysort sfamid: egen sfheadcount=count(sfhead);
*Check that each small family has exactly one head*;
tab sfheadcount;


bysort sfamid: egen sf_kidu18 = sum(under18);
bysort sfamid: egen sf_numpers = sum(one);
bysort sfamid: egen sf_elderly = sum(elderly);

*** see if number of persons in the subfamily matches the total number of persons reported for the family;
*** suspicion is it does not, as for 80-88,89-12 we think that primary family gets number of persons and income;
*** from related subfamilies, but related subfamilies get their own family income;
*EK switched from tab f_type year to tab year f_type, because previously there was an error message
that there were too many values for tab;
tab year f_type if sf_numpers==f_numpers;
*** all the ones that don't match are primary families;
*EK switched from tab f_type year to tab year f_type, because previously there was an error message
that there were too many values for tab;
tab year f_type if sf_numpers!=f_numpers;

*** check that indeed number of persons for CPS canned family measure is big family concept;
gen t = 1 if f_type==1 | f_type==3;
egen countbigf = sum(t), by(hhid);
su countbig f_numpers if sf_numpers!=f_numpers;
su countbig f_numpers if sf_numpers!=f_numpers & countbig==f_numpers;
su countbig f_numpers if sf_numpers!=f_numpers & countbig!=f_numpers;
drop t;


*** fill in sfhead;
replace sfhead=0 if sfhead==.;

drop sfheadcount;

*EK added sur ed fin oi to this list, since they are considered part of total income;
** EK adds a few variables here when she adds Taxsim calculations;
cap rename sf_inc_cs sf_inc_othpr;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth uc wc csp alm sur ed fin oi val rnt div { ;
bysort sfamid: egen sf_inc_`n' = sum(p_inc_`n');
};

*****************************************************;
*Generating big family variables                    *;
* pool all people related to the head with the head *;
*****************************************************;

*DC changes year>=1989 to year>=1988;

*** People related to head;
gen relatedfamily = 100 if (f_type==1 | f_type==3) & year>=1988;
*** family if unrelated to head;
gen unrelatedfamily = f_pos if f_type==4 & year>=1988;
** primary individual no family;
gen singlehher = f_pos if f_type==2 & year>=1988;
gen unrelindiv = f_pos if f_type==5 & year>=1988;

*DC ends;

for any related unrelated singlehher unrelindiv: replace X =0 if X==.;

corr related unrelated singlehher unrelindiv;

** check categories are mutually exclusive;
gen tmper = related + unrelated + singlehher + unrelindiv;
tab year tmper ;
** tmper should be f_pos unless in big family, then is 100;
assert tmper==f_pos if f_type!=1 & f_type!=3;
assert tmper==100 if f_type==1 | f_type==3;

tab tmper f_type;
drop tmper;

*DC changes year>=1989 to year>=1988;

*** Big family;
*** puts all related people together;
*** unrelated stay in their families;
egen bfamid = group(year h_seq relatedfamily unrelatedfamily singlehher unrelindiv);
** Unrelated individual, non-family householder, or unrelated subfamily;
*** stay with existing head;
gen bfhead = 1 if ((p_famrel==1 & f_type!=1 & f_type!=3) | f_type==2 | f_type==5) & year>=1988;
*** For big family (primary family and related subfamilies), head is primary family head ;
replace bfhead = 1 if p_relhd==1 & (f_type==1 | f_type==3) & year>=1988;
**** Note using count works here because head is . if not 1 ******;
* fill in 0s below *;
bysort bfamid: egen bfheadcount=count(bfhead);
*Check that each big family has exactly one head*;
tab bfheadcount; 

*DC ends;

*** fill in bfhead;
replace bfhead=0 if bfhead==.;
drop bfheadcount;


bysort bfamid: egen bf_kidu18 = sum(under18);
bysort bfamid: egen bf_numpers = sum(one);
bysort bfamid: egen bf_elderly = sum(elderly);

*** see if number of persons in the subfamily matches the total number of persons reported for the family;
*** matches for all but related subfamilies and 5 primary families;
*EK switched from tab f_type year to tab year f_type, because previously there was an error message
that there were too many values for tab;
tab year f_type if bf_numpers==f_numpers;
*** does not match for related subfamilies, consistent with above;
*EK switched from tab f_type year to tab year f_type, because previously there was an error message
that there were too many values for tab;
tab year f_type if bf_numpers!=f_numpers;
*** see that sf_numpers matches for these households if subfams;
tab f_type if sf_numpers==f_numpers & bf_numpers!=f_numpers;
tab f_type if sf_numpers!=f_numpers & bf_numpers!=f_numpers;

*EK added sur ed fin oi to this list, since they are considered part of total income;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth uc wc csp alm sur ed fin oi { ;
bysort bfamid: egen bf_inc_`n' = sum(p_inc_`n');
};

*** see that family income matches bfincome/sfincome for the same groups;
*** matches for primary individual, lots of primary families, unrel subfams, secondary, and some related subfams;
*** weird that some related subfamilies are here too;
*** these are ones where bf_inc_tot==sf_inc_tot (no income for bigfamily);
*EK switched from tab f_type year to tab year f_type, because previously there was an error message
that there were too many values for tab;
tab year f_type if bf_inc_tot==f_inc_tot;
** related subfamilies where big fam income=CPS fam inc are ones where primary family has 0 income;
su sf_inc_tot bf_inc_tot f_inc_tot if bf_inc_tot==f_inc_tot & f_type==3;
*** small number of primary families don't match, ignore;
*** large number of subfamiles don't match;
*EK switched from tab f_type year to tab year f_type, because previously there was an error message
that there were too many values for tab;
tab year f_type if bf_inc_tot!=f_inc_tot;
*** check that most of these are ones where sf_inc_tot==f_inc_tot;
*** vast bulk are related subfamilies where sf_inc_tot==f_inc_tot;
su sf_inc_tot f_inc_tot bf_inc_tot f_type year if bf_inc_tot!=f_inc_tot & sf_inc_tot==f_inc_tot;
*** small number of obs where it doesn't match for both some related and unrelated;
su sf_inc_tot f_inc_tot bf_inc_tot f_type year if bf_inc_tot!=f_inc_tot & sf_inc_tot!=f_inc_tot;

drop relatedfamily unrelatedfamily singlehher unrelindiv; 

*** for debugging;
*** pause on;
*** pause;



** EK uncomments this;
** MB comment out for now;
******************************************************;
*Generating CPS family variables for poverty purposes*;
* to compare to P60 published numbers                *;
******************************************************;
*** we think that for 89 for official numbers;
*** primary families and related subfamilies use big family concept;
**** NOTE that reported family income, family threshold, family number of people;
**** do NOT follow this convention, but are consistent with pre-89 rules;

*** Cannot make a consistent family ID for the CPS family income concept reported as family income;
*** that includes people to sum income over, because the big ;
*** families need the related subfamily income;
*** and the small related subfamilies need their own income only;
*** Can create a head though to get characteristics of;
*** if want to use canned family income from CPS;
*** CAUTION THIS MEANS FAMILY INCOME FOR CPS FAMILIES IS NOT INCOME FOR OFFICIAL POVERTY;

*DC changes year>=1989 to year>=1988;

gen cfhead = bfhead if year>=1988& f_type!=3;
replace cfhead=sfhead if year>=1988 & f_type==3;

**** not going to sum the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;
gen cf_kidu18 = bf_kidu18 if year>=1988 & f_type!=3;
replace cf_kidu18 = sf_kidu18 if year>=1988 & f_type==3;
gen cf_numpers = bf_numpers if year>=1988 & f_type!=3;
replace cf_numpers = sf_numpers if year>=1988 & f_type==3;
gen cf_elderly = bf_elderly if year>=1988 & f_type!=3;
replace cf_elderly = sf_elderly if year>=1988 & f_type==3;

**** not going to sum the way we did for households and big and small families;
**** why? because we have established that;
**** cps families are little family for related subs, but big family otherwise;
*** Set equal to relevant concept;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth uc wc csp alm sur ed fin oi { ;
gen cf_inc_`n' = bf_inc_`n' if year>=1988 & f_type!=3;
replace cf_inc_`n' = sf_inc_`n' if year>=1988 & f_type==3;
};

*DC ends;

**** check that our CPS family matches CPS canned measure;
**** mismatches rare 5 obs;
su cf_numpers f_numpers year if cf_numpers!=f_numpers;

*** check family income here cf matches CPS;
*** it does for most;
*** 177 doesn't, vast buil (90%) are 1l rest are positive;
tab year if  cf_inc_tot== f_inc_tot;
tab year if  cf_inc_tot!= f_inc_tot;
gen diff = cf_inc_tot-f_inc_tot;
su diff if cf_inc_tot-f_inc_tot;
drop diff;
*** for debugging;
*** pause on;
*** pause;
* end of comment out;

** drop canned CPS values for people since they always matches our value, leave f_inc_tot since it 
doesn't match for a few people;
drop f_numpers;



/*;
** comment out for now;
** To do fix later;
***************************************************;
*Changing topcoded values for income and wages    *;
***************************************************;
#delimit ;

* this is wrong for now, as top value is midpoint of range for some years;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth uc wc csp alm sur ed fin oi { ;
	qui gen p_inc_`n'_adj = p_inc_`n';
	foreach x in 1989 1990 1991 1992 1993 1994 1995 {;
		qui sum p_inc_`n' if year==`x';
		qui replace p_inc_`n'_adj = p_inc_`n'*1.45 if (p_inc_`n'==r(max) & year==`x');
	};
};

***************************************************;
*Generating adjusted household income variables   *;
***************************************************;
foreach n in ws se fr vet asset othpr sp ss pa dis ret tot earn oth uc wc csp alm sur ed fin oi { ;
bysort hhid: egen h_inc_`n'_adj = sum(p_inc_`n'_adj);
};

** end of comment out, also need to add adjusted small, big, and census family numbers;
*/;


** EK modifies code to add TAXSIM variables 05/26/15;

**********************************************************;
*Calculate children, marital status, income at small family level  *;
**********************************************************;	
*kr changed var names and added new ones 2/6/2018 for new taxsim syntax;		

* We want to assign unrelated children as children of the head of family in primary family;
gen kid_24fts = p_age<=17 & sfhead==0 & p_age!=.;
egen dep17 = sum(kid), by(sfamid);
tab dep17 sf_kidu18, m;
drop kid_24fts dep17;

gen unrel = p_age<15 & f_type==5;		// create variable;
tab unrel sfhead, missing;				// all unrelated children are considered heads of small family;

* Allocate unrelated children to primary family/primary individual;
tab head f_type, missing;				// heads are in primary families or primary individuals;
gen prim_ind = f_type==2;				// make sure no overlap of primary families and individuals;
egen primind_h = max(prim_ind), by(hhid);
tab f_type primind_h, missing;

* 1/3 of (unweighted) unrelated kids is going to primary individuals;
tab unrel primind_h, missing;			

gen tmpsfamid = sfamid if f_type==1 | f_type==2;
egen tmpsfamid2 = max(tmpsfamid), by (hhid);				// Assign the small family id to each person in the household;
replace sfamid = tmpsfamid2 if unrel==1 & tmpsfamid2!=.;	// Replace sfamid of unrel kids with that of the the primary family;

replace sfhead = 0 if unrel==1;				// Unrelated children no longer small family heads;
tab unrel sfhead, missing;						// Now none of them is sfhead, so can count towards kids;
drop tmpsfami*;

*Various credits whether child is 13, 17, 18 or less;
foreach age in 13 17 18 {;
*** Child is 17 or less;
gen kid_`age'fts = p_age<=`age' & sfhead==0 & p_age!=.;
egen dep`age' = sum(kid_`age'fts), by(sfamid);
tab dep`age' sf_kidu18, m;
};
	
*** Number of total dependents;
gen num=1;
egen depx = sum(num), by(sfamid);	
replace depx = depx-1;
drop num;
tab depx dep17, m;
tab depx dep18, m;
for any depx dep18 dep17 dep13: replace X=15 if X>15 & X!=.;

*** Exemptions for old age;
**KR 2/6/2018: this changed in new taxsim syntax. Comment out old stuff
/*
gen temphead=0;
replace temphead=1 if sfhead==1 & p_age>=65 & p_age!=.;
bysort sfamid: egen oldhead=total(temphead);
tab oldhead, m;

gen tempspouse=0;
replace tempspouse=1 if sfhead==0 & p_relhd==3 & p_age>=65 & p_age!=.;
bysort sfamid: egen oldspouse=total(tempspouse);
tab oldspouse, m;

gen agex=oldspouse+oldhead;
tab p_marst agex, m;
*/
egen page = max(p_age*(sfhead == 1)), by(sfamid);
egen sage = max(p_age*(sfhead == 0)*(p_relhd == 3)), by(sfamid); //0 if unmarried


******;
* Create datafile to run through TAXSIM and then merge it to main data;
******;
preserve;

*** Keep one observation for each big family, ie. the family head;
keep if sfhead==1;
	cap g page = p_age if sfhead == 1; //for some reason, wasn't grabbing the earlier version;
tab year, m;

keep year statefip p_marst p_marwt p_age sfamid sfhead f_kind sf_kidu18 dep13 
	dep17 dep18 depx page sage sf_inc_ws 
	sf_inc_se sf_inc_fr sf_inc_val sf_inc_rnt sf_inc_alm sf_inc_div	sf_inc_ret sf_inc_sp sf_inc_ss 
	sf_inc_dis sf_inc_uc;	

for any sf_inc_ws sf_inc_se sf_inc_fr sf_inc_val sf_inc_rnt sf_inc_alm sf_inc_div sf_inc_ret
	sf_inc_sp sf_inc_ss sf_inc_dis sf_inc_uc: replace X=0 if X==.;
	
* Marital status;
tab p_marst, m;
gen mstat=1;
replace mstat=2 if p_marst<3;
replace mstat=3 if mstat==1 & depx>0;

** Fix income variables;
gen pwages=sf_inc_ws + sf_inc_se + sf_inc_fr;  			// personal wages (include self-employment);
replace pwages=0 if pwages<0;									// taxsim will not take negatives;
gen dividends=sf_inc_div; // dividends;
gen intrec = 	sf_inc_val;				  //KR added: interest now broken out;
gen otherprop= sf_inc_rnt ;
gen nonprop =  sf_inc_alm; 	//KR broke out bc of Medicaid NIIT;
gen pensions=sf_inc_ret;
gen gssi=sf_inc_sp + sf_inc_ss + sf_inc_dis;
gen ui=sf_inc_uc;

*Property tax is only at HH level in CPS;

*All missing variables should be 0s;
foreach x in depx dep13 dep17 dep18 page sage mstat pwages dividends intrec 
	otherprop nonprop pensions gssi ui {;
	replace `x'=0 if `x'==.;
};

*Add SOI state codes;
merge m:1 statefip using stateabb.dta, keepus(state_soi);
tab _merge;
drop if _merge==2;
drop _merge;
tab statefip state_soi, m nol;

*Change CPS year to calendar year;
replace year=year-1;
sum year;

keep sfamid year state_soi mstat page sage depx dep13 dep17 dep18 pwages dividends
	intrec otherprop nonprop pensions gssi ui ;
rename state_soi state;

*Put in correct order;
order sfamid year state mstat  page sage depx dep13 dep17 dep18 dividends intrec
	otherprop nonprop pensions gssi ui ;
tab state year, m;

taxsim27, full replace;

* Did Taxsim compute all calculations?;
gen taxsim_miss= fiitax==.;
tab year taxsim_miss, m;

rename fiitax sf_fedpost_taxsim;				// fed tax post all credits;
rename siitax sf_sttax_taxsim;
rename fica sf_fica_taxsim;    					// sum of employer and employee;
rename v22 sf_ctc_taxsim;
rename v23 sf_ctc_add_taxsim;
rename v25 sf_eic_taxsim;
rename v27 sf_amt_taxsim;
rename v28 sf_fedtax_taxsim;
rename v38 sf_stccc_taxsim;
rename v39 sf_steic_taxsim;

keep sfamid year *taxsim page dep1* taxsim_miss;
mdesc;
sum;

for any sf_fedpost_taxsim sf_sttax_taxsim sf_fica_taxsim sf_ctc_taxsim sf_ctc_add_taxsim sf_eic_taxsim sf_fedtax_taxsim ///
	sf_stccc_taxsim sf_steic_taxsim: replace X=0 if X==.;
replace sf_fica_taxsim=sf_fica_taxsim/2; 		// now only employee paid tax;
gen sf_othcred_taxsim = (sf_fedtax_taxsim + sf_amt_taxsim - sf_fedpost_taxsim) - (sf_ctc_taxsim + sf_ctc_add_taxsim + sf_eic_taxsim);
tab year if sf_othcred_taxsim<-1;

sum;
sum if (page<=24 | page>=65) & dep17==0;

for any sf_eic_taxsim sf_steic_taxsim: replace X=0 if (page<=24 | page>=65) & dep18==0;

*Change calendar year to CPS year;
replace year=year+1;
cap drop depchild;

tempfile taxsim2;
save `taxsim2', replace;
restore;

merge m:1 sfamid year using `taxsim2';
drop _merge;
**EK ends TAXSIM changes;


***************************************************;
* Alternative poverty for HH and big family       *;
***************************************************;
*** can't do for small families b/c family fungible transfer amounts are 0 for all related subfams;

** see these are not 0;
** HH;
for any prop_tax housret fsval enrgyva: tab year if year>=1992, su(h_X);

** family;
for any f_mv_sl f_mv_fs f_houssub f_fngcare f_fngcaid: tab year if year>=1992, su(X);

** person;
for any p_emcontrb p_fed_ret p_fed_tax p_eit_cred p_statetax p_fica p_ctc_crd p_actc_crd p_stimulus 
	p_erp_cred p_mwp_cred: tab year if year>=1992, su(X);	/*EK added the last 3 variables to this list*/;
mdesc p_fed_tax if year==1988;  // missing here;
replace p_fed_tax=0 if year==1988;
for any p_inc_othpr f_fngcare f_fngcaid: replace X=0 if X==.;

** family measures;
*** to confirm: related subfams get a 0 for all years;
*** all family measures are repeated across families;
*Note: fhoussub no longer defined from 2016
gen fnmedtransfers = f_mv_fs + f_mv_sl + f_houssub*12;
	replace fnmedtransfers = f_mv_fs + f_mv_sl if f_houssub == .;
gen fmedtransfers = f_fngcare + f_fngcaid ;
*Don't have fungible value for Medicaid/are for redesign years;
** only sum up for BF head;
gen tmpfnmedtransfers = fnmedtransfers if bfhead==1;
egen hnmedtransfers = sum(tmpfnmedtransfers) , by(hhid);
egen bfnmedtransfers = sum(tmpfnmedtransfers) , by(bfamid);
drop tmpfnmedtransfers;
gen tmpfmedtransfers = fmedtransfers if bfhead==1;
egen hmedtransfers = sum(tmpfmedtransfers) , by(hhid);
egen bfmedtransfers = sum(tmpfmedtransfers) , by(bfamid);
drop tmpfmedtransfers;

*EK adds the following to fix issue with household values;
** Variables at household level;
* for big families, primary family  gets property taxes and housret and energy assistance;
foreach x in enrgyva prop_tax housret {;
	gen temp_`x' = 0;
	replace temp_`x' = h_`x' if f_type==1 | f_type==2;
	egen bf_`x' = max(temp_`x'), by(bfamid);
};
drop temp_*;
*EK ends;

** EK modifies code to add TAXSIM variables 05/26/15;
* All taxsim taxes and credits: federal and state taxes, amt taxes, federal EIC, federal child credits, fica. 
Note: here everything is at small family variable, so need to add up;
gen sf_taxesnet_taxsim = - sf_fedtax_taxsim - sf_sttax_taxsim - sf_amt_taxsim + sf_eic_taxsim + sf_ctc_taxsim +  
	sf_ctc_add_taxsim + sf_othcred_taxsim - sf_fica_taxsim;
gen temptax = sf_taxesnet_taxsim if sfhead==1;  
egen h_taxesnet_taxsim = sum(temptax), by(hhid);
egen bf_taxesnet_taxsim = sum(temptax), by(bfamid);
drop temptax;
**EK ends TAXSIM changes;


*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** only sum up for BF head;
gen tmpsl = f_mv_sl if bfhead==1;
egen h_sl = sum(tmpsl), by(hhid);
egen bf_sl = sum(tmpsl), by(bfamid);
drop tmpsl;
*EK ends;

*** stuff at person level;
*** employer HI contributions in all year;
egen hemcontrb = sum(p_emcontrb), by(hhid);
egen bfemcontrb = sum(p_emcontrb), by(bfamid);

*** federal retirement contributions;
*** problem in 2000, too small;
*** leave out;
egen hfed_ret = sum(p_fed_ret), by(hhid);
egen bffed_ret = sum(p_fed_ret), by(bfamid);

** taxes and credits;
gen ptaxesnet = -p_fed_tax + p_eit_cred - p_statetax - p_fica;


*EK comments some lines out, and adds other ones afterwards;

* Exclude CTC: we do this because when we compared the CPS variables to the NAS ones we realized 
that CTC (the nonrefundable portion) was included in the federal tax variable, while the ACTC 
(refundable) was not;
*replace ptaxesnet = - p_fed_tax + p_eit_cred - p_statetax - p_fica + p_ctc_crd + p_actc_crd if 
	p_ctc_crd<. & p_actc_crd<.;
replace ptaxesnet = - p_fed_tax + p_eit_cred - p_statetax - p_fica + p_actc_crd if p_actc_crd<.;

*Add stimulus variables;
replace ptaxesnet = ptaxesnet + p_stimulus if year==2009;
replace ptaxesnet = ptaxesnet + p_erp_cred + p_mwp_cred if year==2010;
replace ptaxesnet = ptaxesnet + p_mwp_cred if year==2011;
*EK ends;

egen htaxesnet = sum(ptaxesnet), by(hhid);
egen bftaxesnet = sum(ptaxesnet), by(bfamid);

* version without ctc/actc, in data from 05, passed in 98;
** nonrefundable 1/2 kid at first;
** big changes in 01 EGTRRA change in brackets, expanded, partially refundable;
** leave in;
gen ptaxesnet2 = -p_fed_tax + p_eit_cred - p_statetax - p_fica;

*EK adds stimulus variables
replace ptaxesnet2 = ptaxesnet2 + p_stimulus if year==2009;
replace ptaxesnet2 = ptaxesnet2 + p_erp_cred + p_mwp_cred if year==2010;
replace ptaxesnet2 = ptaxesnet2 + p_mwp_cred if year==2011;
*EK ends;

egen htaxesnet2 = sum(ptaxesnet2), by(hhid);
egen bftaxesnet2 = sum(ptaxesnet2), by(bfamid);

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
egen heitc = sum(p_eit_cred), by(hhid);
egen bfeitc = sum(p_eit_cred), by(bfamid);
gen tmpkidcred=p_ctc_crd + p_actc_crd;
replace tmpkidcred=0 if tmpkidcred==.;
egen hkidcred  = sum(tmpkidcred), by(hhid);
egen bfkidcred = sum(tmpkidcred), by(bfamid);
*EK ends;

*** 10/10;
*** two versions;
*** 1  with exception of capital gains and losses, mimics def 2 from 00-08 exp. pov;
*** called defa8a2 in 97-99;
*** ftotval + fs + houssub (monthly * 12) + school lunch + energy value + eitc - fica - fed tax - 
state tax + cap gain - cap loss;
*** ours leaves out capital gains and losses, adds in the relevant child credits when they are available;
*** (CHANGE FROM CONFERENCE DRAFT, take out h_fed_ret, property taxes, add in energy);
*** 2 includes medical transfers, employee contribution to HI, and housing return on equity;
*** (change from conference draft, take out h_fed_ret, add in energy);
*** neither includes capital gains;
*** both include child tax credits;

*** nmedtransfers is FS + SL + housing subsidy;
*** taxes net is -statetax - fedtax + eitc -fica, + child credits, stimulus and making work pay when there;
** other version of 1 drops enrgyva, not in til 82;
gen h_inc_tot_alt1a = h_inc_tot + hnmedtransfers + htaxesnet ;
gen h_inc_tot_alt1 = h_inc_tot + hnmedtransfers + h_enrgyva + htaxesnet ;
*** so two is 1 + fungible caid/care, + employer contrib to hi, + implied rental income from housing
 - property taxes;
gen h_inc_tot_alt2 = h_inc_tot + hnmedtransfers + h_enrgyva + htaxesnet - h_prop_tax + hmedtransfers
 	+ hemcontrb + h_housret;
 	
** EK modifies code to add TAXSIM variables 05/26/15;
gen h_inc_tot_alt1a_taxsim = h_inc_tot + hnmedtransfers + h_taxesnet_taxsim;
gen h_inc_tot_alt1_taxsim = h_inc_tot + hnmedtransfers + h_enrgyva + h_taxesnet_taxsim;
gen h_inc_tot_alt2_taxsim = h_inc_tot + hnmedtransfers + h_enrgyva + h_taxesnet_taxsim -  
	h_prop_tax + hmedtransfers + hemcontrb + h_housret;
**EK ends;


*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** NEW 6/11;
*** version of income that excludes safety net and eitc;
*** usual cash income total minus ssi minus public assistance + net taxes minus (eitc and child tax 
credits) plus school lunch;
*** no child tax credits in 80-88;
gen h_inc_tot_notrans = h_inc_tot - h_inc_sp - h_inc_pa + h_sl + htaxesnet - heitc - hkidcred;
*EK ends;

*EK comments this out;
/*;
*** for big families, primary family  gets property taxes and housret and energy assistance;
gen bf_inc_tot_alt1 = bf_inc_tot + bfnmedtransfers + h_enrgyva + bftaxesnet if f_type==1 | f_type==2;
*** other families do not;
replace bf_inc_tot_alt1 = bf_inc_tot + bfnmedtransfers + bftaxesnet if f_type!=1 & f_type!=2;
*/;

*EK adds this;
gen bf_inc_tot_alt1 = bf_inc_tot + bfnmedtransfers + bf_enrgyva + bftaxesnet;
** other version of 1 drops enrgyva, not in til 82;
gen bf_inc_tot_alt1a = bf_inc_tot + bfnmedtransfers + bftaxesnet;

gen bf_inc_tot_alt2 = bf_inc_tot + bfnmedtransfers + bf_enrgyva + bftaxesnet - bf_prop_tax + 
	bfmedtransfers + bfemcontrb + bf_housret; 
*Ek ends;

** EK modifies code to add TAXSIM variables 05/26/15;
gen bf_inc_tot_alt1_taxsim = bf_inc_tot + bfnmedtransfers + bf_enrgyv + bf_taxesnet_taxsim;
gen bf_inc_tot_alt1a_taxsim = bf_inc_tot + bfnmedtransfers + bf_taxesnet_taxsim;
gen bf_inc_tot_alt2_taxsim =  bf_inc_tot + bfnmedtransfers + bf_taxesnet_taxsim + bfmedtransfers 
	+ bfemcontrb + bf_enrgyva - bf_prop_tax + bf_housret;
**EK ends;


*EK comments this out;
/*;
*** for big families, primary family  gets property taxes and housret and energy assistance;
gen bf_inc_tot_alt2 = bf_inc_tot + bfnmedtransfers + h_enrgyva + bftaxesnet - h_prop_tax + 
	bfmedtransfers + bfemcontrb + h_housret if f_type==1 | f_type==2;
*** other families do not;
replace bf_inc_tot_alt2 = bf_inc_tot + bfnmedtransfers + bftaxesnet + bfmedtransfers + bfemcontrb 
	if f_type!=1 & f_type!=2;
*/;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** NEW 6/11;
*** version of income that excludes safety net and eitc;
*** usual cash income total minus ssi minus public assistance + net taxes minus (eitc and child tax 
	credits) plus school lunch;
*** no child tax credits in 80-88;
gen bf_inc_tot_notrans = bf_inc_tot - bf_inc_sp - bf_inc_pa + bf_sl + bftaxesnet - bfeitc - bfkidcred;
*EK ends;

*** check big family amount summed across heads should be same as hh amount;

gen tmpbfinc_tot = bf_inc_tot if bfhead==1;
egen tmphhsumbfinc = sum(tmpbfinc_tot), by(hhid);

su tmphhsumbfinc h_inc_tot;
drop tmphhsum tmpbfinc_tot;

gen tmpbfinc_tot_alt1 = bf_inc_tot_alt1 if bfhead==1;
egen tmphhsumbfincalt1 = sum(tmpbfinc_tot_alt1), by(hhid);

su tmphhsumbfincalt1 h_inc_tot_alt1;
drop tmphhsumbfincalt1 tmpbfinc_tot_alt1;


***************************************************;
* NAS poverty for HH and big family       		  *;
***************************************************;
** family measures;

gen tmpfaminc_msi = faminc_msi if bfhead==1;
egen h_inc_tot_nas = sum(tmpfaminc_msi) , by(hhid);
egen bf_inc_tot_nas = sum(tmpfaminc_msi) , by(bfamid);
drop tmpfaminc_msi;
label var h_inc_tot_nas "Total HH Income, NAS definition";
label var bf_inc_tot_nas "Total Big Family Income, NAS definition";

*** check big family amount summed across heads should be same as hh amount;

gen tmpbfinc_tot_nas = bf_inc_tot_nas if bfhead==1;
egen tmphhsumbfinc_nas = sum(tmpbfinc_tot_nas), by(hhid);

su tmphhsumbfinc_nas h_inc_tot_nas;
drop tmphhsumbfinc_nas tmpbfinc_tot_nas;


***************************************************;
*Poverty Thresholds Households                    *;
***************************************************;
gen kids = h_kidu18;
*MB;
*** don't count householder as a kid if household number of persons==kid number of persons;
egen t = max(p_age<18 & (p_relhd==1|p_relhd==2) & h_numpers==h_kidu18), by(hhid);
replace kids = h_kidu18 -1 if t==1;
drop t;
** MB end;
replace kids = 8 if kids>8 & kids~=.;
gen famsize = h_numpers;
	replace famsize = 9 if famsize>9 & famsize~=.;

*** MB added category to get other thresholds for 1/2 person elderly families;
*** under652person is indicator for <=2 persons and no elderly;
*** it is 1 if 2 person family and no elderly;
*** 0 if 2 person family and elderly;
*** -1 if >=3 person family;
gen under652person =1 if famsize<=2 & h_elderly==0;
replace under652person = 0 if famsize<=2 & h_elderly>=1 & h_elderly<.;
replace under652person = -1 if famsize>=3;

*** year of income;
drop calyear;		// EK adds this line since the previous calyear was present only for the later years;
gen calyear = year-1;

sort calyear famsize kids under652person;
merge m:1 calyear famsize kids under652person using pov_thresholds_1980_2015.dta;
**Z merge m:1 calyear famsize kids under652person using /data/brook/cycles/march/pov_thresholds_1980_2014.dta;

tab calyear _merge;


*** MB ;
*** these do not match, mark them;
*** _merge=1 means didn't match years in threshold data;
gen problemthreshold = _merge==1 ;
tab year problemthreshold ;
*** set threshold to missing for these observation;
*** after drop earlier years;
*** 394 obs don't match thresholds;
*** all households with 
replace threshold=. if _merge==1;
su p_age sf_kidu18 kids famsize sf_numpers if _merge==1;
*** MB end;


*** 2s are combos not in data - no HHid for these obs;
su hhid if _merge ==2;
drop if _merge==2;
drop _merge;

*DC skips this because added later;
/*
*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
***** new for immigration 6/11;
gen hpovlt25 =   h_inc_tot<=(threshold*.25 ) if threshold<.;
gen hpov2550 =   h_inc_tot<=(threshold*.5  ) & h_inc_tot> (threshold*.25 ) if threshold<.;
gen hpov5075 =   h_inc_tot<=(threshold*.75 ) & h_inc_tot> (threshold*.50 ) if threshold<.;
gen hpov75100 =  h_inc_tot<=(threshold*1   ) & h_inc_tot> (threshold*.75 ) if threshold<.;
gen hpov100125 = h_inc_tot<=(threshold*1.25) & h_inc_tot> (threshold*1   ) if threshold<.;
gen hpov125150 = h_inc_tot<=(threshold*1.5 ) & h_inc_tot> (threshold*1.25) if threshold<.;
gen hpov150175 = h_inc_tot<=(threshold*1.75) & h_inc_tot> (threshold*1.5 ) if threshold<.;
gen hpov175200 = h_inc_tot<=(threshold*2   ) & h_inc_tot> (threshold*1.75) if threshold<.;
gen hpovgt200 =  h_inc_tot<.                & h_inc_tot> (threshold*2  ) if threshold<.;

gen tmp=hpovlt25 + hpov2550 + hpov5075 + hpov75100 + hpov100125 + hpov125150 + hpov150175 + hpov175200 + hpovgt200;
tab tmp;
drop tmp;
*EK ends;
*/
*DC ends;

gen hpovlt50 = h_inc_tot<=(threshold*.5) if threshold<.;
gen hpovlt150 = h_inc_tot<=(threshold*1.5) if threshold<.;
gen hpov50100 = (threshold*.5)<h_inc_tot & h_inc_tot<=(threshold) if threshold<.;
gen hpov100200 = (threshold)<h_inc_tot & h_inc_tot<=(threshold*2) if threshold<.;
gen hpovcat = 1 if hpovlt50==1;
	replace hpovcat = 2 if hpov50100==1;
	replace hpovcat = 3 if hpov100200==1;
	replace hpovcat = 0 if h_inc_tot>(threshold*2);
gen hbelowpov = h_inc_tot<(threshold) if threshold<.;
su hpov*;
tab hpovcat, missing;
tab year if hpovcat==.;

*DC adds cuts of poverty in small bins (09/30/2014);

drop hpovlt*;
forval x=25(25)400 {;
gen hpovlt`x' =   h_inc_tot<=(threshold*`x'/100) if threshold<.;
};

gen hpovgt400 =  h_inc_tot<. & h_inc_tot> (threshold*4  ) if threshold<.;

gen tmp=hpovlt400+hpovgt400;
tab tmp;
drop tmp;

forval x=25(25)375 {;
	local k= `x'+25;
	gen hpov`x'`k' = (threshold*`x'/100)<h_inc_tot & h_inc_tot<=(threshold*`k'/100) if threshold<.;
	sum hpov`x'`k';
};

*note : want to avoid rowsumming hpov50100 and hpov100200;
egen tmp = rowtotal(hpovlt25 hpov100125 hpov125150 hpov150175 hpov175200 hpov2* hpov3* hpov5075 hpov7* hpovgt400);
count if h_inc_tot==. | threshold==.;
tab tmp;
drop tmp;

*DC ends;

** EK modifies code to add TAXSIM variables 05/26/15;
* EK makes loop so code is more straighforward;

** alt definition 1 a no LIHEAP, alt definition 1, alt definition 2 (each also for taxsim);
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	gen h`k'povlt50 = h_inc_tot_`k'<=(threshold*.5) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'povlt150 = h_inc_tot_`k'<=(threshold*1.5) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'pov50100 = (threshold*.5)<h_inc_tot_`k' & h_inc_tot_`k'<=(threshold) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'pov100200 = (threshold)<h_inc_tot_`k' & h_inc_tot_`k'<=(threshold*2) if threshold<. & h_inc_tot_`k'<.;
	gen h`k'povcat = 1 if h`k'povlt50==1 & h_inc_tot_`k'<.;
		replace h`k'povcat = 2 if h`k'pov50100==1 & h_inc_tot_`k'<.;
		replace h`k'povcat = 3 if h`k'pov100200==1 & h_inc_tot_`k'<.;
		replace h`k'povcat = 0 if h_inc_tot_`k'>(threshold*2) & h_inc_tot_`k'<.;
	gen h`k'belowpov = h_inc_tot_`k'<(threshold) if threshold<. & h_inc_tot_`k'<.;
	su h`k'pov*;
	tab h`k'povcat, missing;
	tab year if h`k'povcat==.;
};

*DC adds cuts of poverty in small bins (09/30/2014);
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	drop h`k'povlt*;
	forval x=25(25)400 {;
		gen h`k'povlt`x' =   h_inc_tot_`k'<=(threshold*`x'/100) if threshold<. & h_inc_tot_`k'<.;
	};

	gen h`k'povgt400 =  h_inc_tot_`k'<. & h_inc_tot_`k'> (threshold*4) if threshold<.;

	gen tmp=h`k'povlt400+h`k'povgt400;
	tab tmp;
	drop tmp;

	forval x=25(25)375 {;
	local i= `x'+25;
		gen h`k'pov`x'`i' = (threshold*`x'/100)<h_inc_tot_`k' & h_inc_tot_`k'<=(threshold*`i'/100) if threshold<. & h_inc_tot_`k'<.;
		sum h`k'pov`x'`i';
	};

	*note : want to avoid rowsumming h`k'pov50100 and h`k'pov100200;
	egen tmp = rowtotal(h`k'povlt25 h`k'pov100125 h`k'pov125150 h`k'pov150175 h`k'pov175200 h`k'pov2* h`k'pov3* h`k'pov5075 h`k'pov7* h`k'povgt400);
	count if h_inc_tot_`k'==. | threshold==.;
	tab tmp;
	drop tmp;
};
*DC ends;


*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
** no transfers;
gen hnotranspovlt50 = h_inc_tot_notrans<=(threshold*.5) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspovlt150 = h_inc_tot_notrans<=(threshold*1.5) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspov50100 = (threshold*.5)<h_inc_tot_notrans & h_inc_tot_notrans<=(threshold) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspov100200 = (threshold)<h_inc_tot_notrans & h_inc_tot_notrans<=(threshold*2) if threshold<. & h_inc_tot_notrans<.;
gen hnotranspovcat = 1 if hnotranspovlt50==1 & h_inc_tot_notrans<.;
replace hnotranspovcat = 2 if hnotranspov50100==1 & h_inc_tot_notrans<.;
replace hnotranspovcat = 3 if hnotranspov100200==1 & h_inc_tot_notrans<.;
replace hnotranspovcat = 0 if h_inc_tot_notrans>(threshold*2) & h_inc_tot_notrans<.;
gen hnotransbelowpov = h_inc_tot_notrans<(threshold) if threshold<. & h_inc_tot_notrans<.;
su hnotranspov*;
tab hnotranspovcat, missing;
tab year if hnotranspovcat==.;
*EK ends;

tab halt1belowpov halt2belowpov;
tab halt1belowpov hbelowpov;
tab halt2belowpov hbelowpov;

*DC changes this to include smaller bins;
/*;
*** make sure alternative versions not defined for 1991;
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace halt1X = . if year==1991;
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace halt2X = . if year==1991;

*EK adds this to be consistent with previous two lines;
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace halt1aX = . if year==1991;
*EK ends;
*/;

foreach x in alt1a alt1 alt2  alt1a_taxsim alt1_taxsim alt2_taxsim {;
	forval i=25(25)400 {;
		local k= `i'+25;
		replace h`x'povlt`i'=. if year==1991;
		capture replace h`x'pov`i'`k'=. if year==1991;
	};
};
*DC ends;
**EK ends TAXSIM changes;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: replace hnotransX = . if  year==1991;
*EK ends;

*** MB;
*** Keep hh threshold, problem indicator, drop rest;
rename threshold hthreshold;
rename problemthreshold hprobthreshold;

*DC changes from year>=1989 to year>=1988;

** check on people with problemthresholds;
list year hhid bfamid sfamid p_age p_sex h_numpers if hprobthreshold==1 & year>=1988 & year < 2016;


*** noone over 18 in hh;
assert p_age<18 if hprobthreshold==1 & year>=1988 & year < 2016;
drop calyear famsize kids under652person;
*DC ends;
*** MB end;


***************************************************;
*Poverty Thresholds Small Families                *;
***************************************************;
gen kids = sf_kidu18;
** MB;
*** don't count small family head as a kid if small family number of persons==kid number of persons;
egen t = max(p_age<18 & sfhead==1 & sf_numpers==sf_kidu18), by(sfamid);
replace kids = sf_kidu18 -1 if t==1;
drop t;
* MB end;
replace kids = 8 if kids>8 & kids~=.;
gen famsize = sf_numpers;
	replace famsize = 9 if famsize>9 & famsize~=.;

*** MB added category to get other thresholds for 1/2 person elderly families;
*** under652person is indicator for <=2 persons and no elderly;
*** it is 1 if 2 person family and no elderly;
*** 0 if 2 person family and elderly;
*** -1 if >=3 person family;
gen under652person =1 if famsize<=2 & sf_elderly>=0;
replace under652person = 0 if famsize<=2 & sf_elderly>=1 & sf_elderly<.;
replace under652person = -1 if famsize>=3;

*** year of income;
gen calyear = year-1;

sort calyear famsize kids under652person;
merge m:1 calyear famsize kids under652person using pov_thresholds_1980_2015.dta;
**Zmerge m:1 calyear famsize kids under652person using /data/brook/cycles/march/pov_thresholds_1980_2014.dta;

tab calyear _merge;

*** MB no 1s;
*** _merge=1 means didn't match years in threshold data;
gen problemthreshold = _merge==1 ;
tab year problemthreshold ;
*** set threshold to missing for these observation;
replace threshold=. if _merge==1;
su p_age sf_kidu18 kids famsize sf_numpers if _merge==1;
*** MB end;

*** Dropping combinations of people not in actual data;
*** 2s are combos not in data - no HHid for these obs;
su hhid if _merge ==2;
drop if _merge==2;
drop _merge;
gen sfpovlt50 = sf_inc_tot<=(threshold*.5) if threshold<.;
gen sfpovlt150 = sf_inc_tot<=(threshold*1.5) if threshold<.;
gen sfpov50100 = (threshold*.5)<sf_inc_tot & sf_inc_tot<=(threshold) if threshold<.;
gen sfpov100200 = (threshold)<sf_inc_tot & sf_inc_tot<=(threshold*2) & threshold<.;
gen sfpovcat = 1 if sfpovlt50==1;
	replace sfpovcat = 2 if sfpov50100==1;
	replace sfpovcat = 3 if sfpov100200==1;
	replace sfpovcat = 0 if sf_inc_tot>(threshold*2);
gen sfbelowpov = sf_inc_tot<(threshold) if threshold<.;
su sfpov*;
tab sfpovcat, missing;
tab year if sfpovcat==.;

** no alt definitions;

* MB;
*** Keep sf threshold, problem indicator, drop rest;
rename threshold sfthreshold;
rename problemthreshold sfprobthreshold;
drop calyear famsize kids under652person;

*DC changes from year>=1989 to year>=1988;
** check on people with problemthresholds;
list year hhid bfamid sfamid p_age p_sex sf_numpers if sfprobthreshold==1 & year>=1988 & year < 2016;
*** noone over 18 in sf;
assert p_age<18 if sfprobthreshold==1 & year>=1988 & year < 2016;
* MB end;

*DC ends;
***************************************************;
*Poverty Thresholds Big Families                  *;
***************************************************;
gen kids = bf_kidu18;
** MB;
*** don't count small family head as a kid if small family number of persons==kid number of persons;
egen t = max(p_age<18 & bfhead==1 & bf_numpers==bf_kidu18), by(bfamid);
replace kids = bf_kidu18 -1 if t==1;
drop t;
* MB end;
replace kids = 8 if kids>8 & kids~=.;
gen famsize = bf_numpers;
replace famsize = 9 if famsize>9 & famsize~=.;

*** MB added category to get other thresholds for 1/2 person elderly families;
*** under652person is indicator for <=2 persons and no elderly;
*** it is 1 if 2 person family and no elderly;
*** 0 if 2 person family and elderly;
*** -1 if >=3 person family;
gen under652person =1 if famsize<=2 & bf_elderly>=0;
replace under652person = 0 if famsize<=2 & bf_elderly>=1 & bf_elderly<.;
replace under652person = -1 if famsize>=3;

*** year of income;
gen calyear = year-1;

sort calyear famsize kids under652person;
merge m:1 calyear famsize kids under652person using pov_thresholds_1980_2015.dta;
**ZZmerge m:1 calyear famsize kids under652person using /data/brook/cycles/march/pov_thresholds_1980_2014.dta;

tab calyear _merge;
*** MB most of 1s;
*** some small share are families with famsize=number of kids under 18;
*** these do not match, mark them;
*** _merge=1 means didn't match years in threshold data;
gen problemthreshold = _merge==1 ;
tab year problemthreshold ;
*** set threshold to missing for these observation;
*** after drop later years;
*** drop if year>=1989;
replace threshold=. if _merge==1;
su p_age bf_kidu18 kids famsize bf_numpers if _merge==1;
*** MB end;

*** Dropping combinations of people not in actual data no hhids;
su hhid if _merge==2;
drop if _merge==2;
drop _merge;
gen bfpovlt50 = bf_inc_tot<=(threshold*.5) if threshold<.;
gen bfpovlt150 = bf_inc_tot<=(threshold*1.5) if threshold<.;
gen bfpov50100 = (threshold*.5)<bf_inc_tot & bf_inc_tot<=(threshold) if threshold<.;
gen bfpov100200 = (threshold)<bf_inc_tot & bf_inc_tot<=(threshold*2) if threshold<.;
gen bfpovcat = 1 if bfpovlt50==1;
	replace bfpovcat = 2 if bfpov50100==1;
	replace bfpovcat = 3 if bfpov100200==1;
	replace bfpovcat = 0 if bf_inc_tot>(threshold*2);
gen bfbelowpov = bf_inc_tot<(threshold) if threshold<.;
su bfpov*;
tab bfpovcat, missing;
tab year if bfpovcat==.;

**** MB;
**** check HH info that big family poverty is poverty;
**** very close, off for 626 observations;
tab f_famlis bfbelowpov;

** EK modifies code to add TAXSIM variables 05/26/15;
* EK makes loop so code is more straighforward;

** alt definition 1 a no LIHEAP, alt definition 1, alt definition 2 (each also for taxsim);
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	qui gen bf`k'povlt50 = bf_inc_tot_`k'<=(threshold*.5) if threshold<. & bf_inc_tot_`k'<.;
	qui gen bf`k'povlt150 = bf_inc_tot_`k'<=(threshold*1.5) if threshold<. & bf_inc_tot_`k'<.;
	qui gen bf`k'pov50100 = (threshold*.5)<bf_inc_tot_`k' & bf_inc_tot_`k'<=(threshold) if threshold<. 
		& bf_inc_tot_`k'<.;
	qui gen bf`k'pov100200 = (threshold)<bf_inc_tot_`k' & bf_inc_tot_`k'<=(threshold*2) if threshold<. 
		& bf_inc_tot_`k'<.;
	qui gen bf`k'povcat = 1 if bf`k'povlt50==1 & bf_inc_tot_`k'<.;
		qui replace bf`k'povcat = 2 if bf`k'pov50100==1 & bf_inc_tot_`k'<.;
		qui replace bf`k'povcat = 3 if bf`k'pov100200==1 & bf_inc_tot_`k'<.;
		qui replace bf`k'povcat = 0 if bf_inc_tot_`k'>(threshold*2) & bf_inc_tot_`k'<.;
	qui gen bf`k'belowpov = bf_inc_tot_`k'<(threshold) if threshold<. & bf_inc_tot_`k'<.;
	su bf`k'pov*;
	tab bf`k'povcat, missing;
	tab year if bf`k'povcat==.;
};


*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
** no transfers;
gen bfnotranspovlt50 = bf_inc_tot_notrans<=(threshold*.5) if threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspovlt150 = bf_inc_tot_notrans<=(threshold*1.5) if threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspov50100 = (threshold*.5)<bf_inc_tot_notrans & bf_inc_tot_notrans<=(threshold) if 
	threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspov100200 = (threshold)<bf_inc_tot_notrans & bf_inc_tot_notrans<=(threshold*2) if 
	threshold<. & bf_inc_tot_notrans<.;
gen bfnotranspovcat = 1 if bfnotranspovlt50==1 & bf_inc_tot_notrans<.;
replace bfnotranspovcat = 2 if bfnotranspov50100==1 & bf_inc_tot_notrans<.;
replace bfnotranspovcat = 3 if bfnotranspov100200==1 & bf_inc_tot_notrans<.;
replace bfnotranspovcat = 0 if bf_inc_tot_notrans>(threshold*2) & bf_inc_tot_notrans<.;
gen bfnotransbelowpov = bf_inc_tot_notrans<(threshold) if threshold<. & bf_inc_tot_notrans<.;
su bfnotranspov*;
tab bfnotranspovcat, missing;
tab year if bfnotranspovcat==.;
*EK ends;

tab bfalt1belowpov bfalt2belowpov;
tab bfalt1belowpov bfbelowpov;
tab bfalt2belowpov bfbelowpov;

*** make sure alternative versions not defined for 1991;
foreach k in alt1a alt1 alt2 alt1a_taxsim alt1_taxsim alt2_taxsim {;
	for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: qui replace bf`k'X = . if year==1991;
};

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
for any povlt50 povlt150 pov50100 pov100200 povcat belowpov: qui replace bfnotransX = . if year==1991;
*EK ends;
**EK ends TAXSIM changes;

* MB;
*** Keep bf threshold, problem indicator, drop rest;
rename threshold bfthreshold;
rename problemthreshold bfprobthreshold;
drop calyear famsize kids under652person;

*DC changes from year>=1989 to year>=1988;
** check on people with problemthresholds;
list year hhid bfamid sfamid p_age p_sex bf_numpers if bfprobthreshold==1 & year>=1988 & year < 2016;
*** noone over 18 in bf;
assert p_age<18 if bfprobthreshold==1 & year>=1988 & year < 2016;
*DC ends;
* MB end;

*EK adds this;

 #delimit cr
********************************************************************************
**KR added this split. Taxsim requires FTP connection, which doesn't work in
*batch. Since the code takes a long time to run, save files here, then run the 
*rest of the code in batch.
********************************************************************************
forvalues y = 1988/2017 {
preserve
	keep if year == `y'
	qui compress
	save "./marcps88on/temp/taxsim`y'.dta", replace
restore
*Do this so file gets progressively smaller;
	drop if year == `y'
	}
cap log close
*The rest of the orifinal marchcpsfamilyhh runs as
*marchcpsfamilyhh_aftertaxsim.do.
