/*File created by Jessamyn Schaller (JS) and updated/modified by Marianne Bitler (MB).
Elira Kuka (EK) updated this file to include 2011 and 2012 data.
NOTICE: EK wrote EK_CHECK in front of things still needed checking or thought. Plus EK marked as *EK
all her changes and comments that are OK.
Last update: 3/14/2013
MB modified 5/15/2013 to change definitions for young adult living arrangements
EK added 2013 CPS data on 12/09/13
DC added 2014 CPS data on 29/09/14
EK added TAXSIM taxes, credits, and poverty rates 05/22/15.
DC added UNICON data for CPS year 1988B and PROP_TAX and HOUSRET for CPS year 1991 on 8/27/2015
DC added 2015 CPS data on 12/20/2015
DC added redesigned 2014 CPS data on 2/15/2016
EK changed on 8/28/16 the name of variables *inc_cs to *inc_othpr to make sure 
we are aware that both CS/Alimony and other stuff is there.
KR added 2015 and 2016 files 4/7/2017
KR added SPM threshold from Bitler, Hoynes, Kuka
*/
clear all
set more off, perm

capture log close

*Install TAXSIM ado file (if necessary)
*KR updated this 2/6/2018 to run taxsim27 (shouldn't need for this file since taxsim already run)
net from "http://www.nber.org/stata"
net describe taxsim27
net install taxsim27, replace

*EK adds local paths
local march "/accounts/projects/hoynes/cycles/march/"
local spm_raw "/accounts/projects/hoynes/cycles/march/spm_raw"
local nas_raw "/accounts/projects/hoynes/cycles/march/nas_raw"
*EK ends

cd "`march'"

*** NOTE STUFF WITH Z in front is for things to change at CURIE (MB) machine;
log using marchcpsfamilyhh_aftertaxsim.log, replace
*Z log using marchcpsfamilyhh-curie.log, replace;

!date

/*Added 2/6/2018: Anchored SPM poverty*/
**First create thresholds to be used in SPM poverty**
*************************
*** Create poverty rates with ANCHORED SPM thresholds
*************************

*** Create state-family size thresholds for 2014 SPM
use "`spm_raw'/spmresearch2014.dta", clear
tempfile spmanchor

assert spmu_numper==spmu_numk+spmu_numa		// ok
*drop if spmu_numk==0
keep spmu_num* gestfips spmu_povth* marsupwt
replace spmu_nump=5 if spmu_nump>5
replace spmu_numk=4 if spmu_numk>4
gen num=1

collapse (mean) spmu_povth (rawsum) num [pw=marsupwt], by(gestfips spmu_numper spmu_numk)
tab spmu_nump spmu_numk, m
sum num, d
tab spmu_nump spmu_numk if num>1000, m
drop num

* Rename variables
rename gestfips statefip
rename spmu_povth spmthresh2014

*** Now create a dataset for all years
gen year=2016
expand 37
bysort statefip spmu_numper spmu_numk: replace year=year+1-_n
	tab year, mi

* Add CPI-U-RS
//  CPI-U-RS data taken by EK in April 2016 from http://www.bls.gov/cpi/cpiursfe1977-2015.pdf.
merge m:1 year using cpi_urs.dta, gen (mercpi)
tab year if mercpi==2, m
drop if mercpi==2
drop mercpi
*Get CPI in 2014 and adjust for inflation
	summ cpi_urs if year == 2014, d
		local cpi2014 = r(mean) //doesn't matter what # we take -- just 1 value
gen spmthreshanchor=spmthresh2014*cpi_urs/`cpi2014'
table year, c(mean spmthreshanchor mean spmthresh2014 mean cpi_urs)
rename year calyear
save `spmanchor'

#delimit ;

*** Changlog;
*** was marchcpsfinal;
*** MB also changes to do family and HH stuff;

*** MB changes to use her dct files in mb directory;
*** has added some variables including total hh income for checking purposes;
*** All stuff related to small/big/CPS families is MB;
*** MB changed concept of head to be at least 15 years old;
*** MB removes some sample selection stuff because it is screwing something up;
*** MB adds bf alternate poverty measures;
*** MB fixes so ALL $ amounts are in real terms;
* EK adds r_ to all terms in real values;
* EK adds all NAS/SPM related material;
* EK adds all immigration code from the marchcpsfamilyhh_immig.do file

*** 10/12;
*** MB fixes to make unique ID so won't be issue with collapsing;
**  Note float is unique up to about 8 million;
**  If any of ids go above 8 million, we should switch type;
**  from float to double precision;
** Note MB checked and sfamid for 1980-88 is under 1 million;
** so we can get a unique set of IDS cross year by adding 1 million;
** to 89-201X ones;
*** Also MB fixes error in SF weights;

clear;
clear matrix;
set more off;


*This will be the master dofile that takes the raw March CPS data and *;
*creates the collapsed data file for years 1977-2014.*;
*on Baker*;
*Z on curie;
*Z cd /data/brook/cycles/march;


*** commented out for debugging;
*** all rerun recently by hand;
*This takes raw CPS files and turns them into marcps7788.dta*;
*do infile;
*This takes NBER CPS files and turns them into marcps8914.dta*;
*do lateryears_recode.do;
* This takes thresholds from JS and updates for elderly HH *;
*do make-pov-threshold-right.do // EK: Not used anymore; 
* This takes guidelines from JS and updates for pre-82 and AK/HI *;
*do update_guidelines_pre-82.do;

clear;
set mem 10g;

*these files created in marchcpsfamilyhh_taxsim.do;
forvalues y = 1988/2009 {;
	use "./marcps88on/temp/taxsim`y'.dta", clear;
	
***************************************************;
*SPM Poverty Thresholds              *;
***************************************************;	
g calyear = year - 1;
*** Merge these thresholds;
gen spmu_numper=h_numpers;
cap gen h_kids = h_kidu18; 
gen spmu_numkids=h_kids;
replace spmu_numper=5 if spmu_numper>5;
replace spmu_numk=4 if spmu_numk>4;
merge m:1 calyear statefip spmu_numper spmu_numk using `spmanchor', gen(threanch);
tab calyear threanch, m;
bysort threanch: tab spmu_numper spmu_numk, m;
drop if threanch==2;

/*KR commented out. Have SPM thresholds for all fam types, add to income as needed
* Poverty rates for main definitions of income
foreach x in ma mapr maprpub maprpubtax maprpubtax_md  mapr_moop maprpubtax_moop {;
	forvalues X = 50(50)200 {;
	gen hp`X'_`x'_aspm= h_inc_`x' <= `X' * spmthreshanchor/100;
		replace hp`X'_`x'_aspm = . if spmthreshanchor==. | h_inc_`x'==.;
};
};
drop spmu_num*;			// drop the ones with cutoff;
*/



***************************************************;
*NAS Poverty Thresholds Big Families              *;
***************************************************;

* Confirm thresholds for NAS family are at the big family level;
gen tmpthresh_cega = thresh_cega if bfhead==1;
egen bf_thresh_cega = sum(tmpthresh_cega) , by(bfamid);

gen diff= bf_thresh_cega != thresh_cega;
tab year diff, missing;
tab f_type diff, missing;

* Create poverty variables;
gen bfnaslt50 = bf_inc_tot_nas<=(thresh_cega*.5) if thresh_cega<.;
gen bfnaslt150 = bf_inc_tot_nas<=(thresh_cega*1.5) if thresh_cega<.;
gen bfnas50100 = (thresh_cega*.5)<bf_inc_tot_nas & bf_inc_tot_nas<=(thresh_cega) if thresh_cega<.;
gen bfnas100200 = (thresh_cega)<bf_inc_tot_nas & bf_inc_tot_nas<=(thresh_cega*2) if thresh_cega<.;
gen bfnascat = 1 if bfnaslt50==1;
	replace bfnascat = 2 if bfnas50100==1;
	replace bfnascat = 3 if bfnas100200==1;
	replace bfnascat = 0 if bf_inc_tot_nas>(thresh_cega*2);
gen bfbelownas = bf_inc_tot_nas<(thresh_cega) if thresh_cega<.;
su bfnas*;
tab bfnascat, missing;
tab year if bfnascat==.;

*EK ends;

/*; 
*** comment out for now;
***************************************************;
*Poverty Thresholds Census Families               *;
***************************************************;
**** CAUTION we have determined this measure is for big familes;
**** EVEN THOUGH FAMILY INCOME MATCHES pre-89 convention that ;
**** Primary gets own+related subfam, and rel subfam gets own;

gen cfthreshold = bfthreshold if year>=1989;

gen cfprobthreshold = bfprobthreshold if year>=1989;

********** Note USING BIG FAMILY CONCEPT;
**** CAUTION IS INCONSISTENT WITH family income and number of people canned;
gen cfpovlt50 = bfpovlt50;
gen cfpov50100 = bfpov50100;
gen cfpov100200 = bfpov100200;
gen cfpovcat = bfpovcat;
gen cfbelowpov = bfbelowpov;

su cfpov*;
tab cfpovcat, missing;
tab year if cfpovcat==.;

*** check this with official poverty measure in CPS;
*** almost 100% on, survey year 1989 forward;
*** off for 626 obs;
tab f_famlis cfbelowpov if year >=1989;

*** show nonmatches;
tab year f_type if f_famlis==1  & cfbelowpov==1 ;
tab year f_type if f_famlis!=1  & cfbelowpov==0 ;
tab f_famlis cfbelowpov;
*** pause;
*** problematic observations;
*** small number 626;
list hhid year f_type *threshold *_inc_tot f_famlis *belowpov p_age if f_famlis==1 & cfbelowpov==0 & cfbelowpov==0 & year>=1989;

tab year f_famlis if cfbelowpov==.;

*** for debugging;
*** pause on;
*** pause;
* end of comment out;
*/;

***************************************************;
*Alt Experimental Poverty Thresholds              *;
***************************************************;
*** These are created in altpovthreshold-05.do;
*** from 2005 exp. pov. report;
*** 3 scale equivalence but still mostly transfers/taxes;
*** backed up by CPI U;
*** could add CPI U RS but didn't;

gen kids = h_kidu18;
*MB;
*** don't count householder as a kid if household number of persons==kid number of persons;
egen t = max(p_age<18 & (p_relhd==1|p_relhd==2) & h_numpers==h_kidu18), by(hhid);
replace kids = h_kidu18 -1 if t==1;
drop t;
** MB end;
replace kids = 8 if kids>8 & kids~=.;
gen famsize = h_numpers;
	replace famsize = 9 if famsize>9 & famsize~=.;

*** no extra old/young category for 1/2 person families;

*** year of income;
gen incyear = year-1;

sort incyear kids famsize;
merge m:1 incyear kids famsize using adjusted_exp_thresholds.dta;
**Z merge m:1 incyear kids famsize using /data/brook/cycles/march/adjusted_exp_thresholds.dta;

tab incyear _merge;

*** MB ;
*** these do not match, mark them;
*** _merge=1 means didn't match years in altthreshcpiu data;
gen problemaltthreshcpiu = _merge==1 ;
tab year problemaltthreshcpiu;
*** set altthreshcpiu to missing for these observation;
*** after drop earlier years;
*** XXX obs don't match altthreshcpius;
*** all households with 
replace altthreshcpiu=. if _merge==1;
su p_age sf_kidu18 kids famsize sf_numpers if _merge==1;
*** MB end;

*** Dropping combinations of people not in actual data no hhids;
su hhid if _merge==2;
drop if _merge==2;
drop _merge;

*** no regular poverty with this version of thresholds;

** alt definition 1, alt altthreshcpiu;
gen halt1etpovlt50 = h_inc_tot_alt1<=(altthreshcpiu*.5) if altthreshcpiu<. & h_inc_tot_alt1<.;
gen halt1etpovlt150 = h_inc_tot_alt1<=(altthreshcpiu*1.5) if altthreshcpiu<. & h_inc_tot_alt1<.;
gen halt1etpov50100 = (altthreshcpiu*.5)<h_inc_tot_alt1 & h_inc_tot_alt1<=(altthreshcpiu) if altthreshcpiu<. & h_inc_tot_alt1<.;
gen halt1etpov100200 = (altthreshcpiu)<h_inc_tot_alt1 & h_inc_tot_alt1<=(altthreshcpiu*2) if altthreshcpiu<. & h_inc_tot_alt1<.;
gen halt1etpovcat = 1 if halt1etpovlt50==1 & h_inc_tot_alt1<.;
	replace halt1etpovcat = 2 if halt1etpov50100==1 & h_inc_tot_alt1<.;
	replace halt1etpovcat = 3 if halt1etpov100200==1 & h_inc_tot_alt1<.;
	replace halt1etpovcat = 0 if h_inc_tot_alt1>(altthreshcpiu*2) & h_inc_tot_alt1<.;
gen halt1etbelowpov = h_inc_tot_alt1<(altthreshcpiu) if altthreshcpiu<. & h_inc_tot_alt1<.;
su halt1etpov*;
tab halt1etpovcat, missing;
tab year if halt1etpovcat==.;

** alt definition 2;
gen halt2etpovlt50 = h_inc_tot_alt2<=(altthreshcpiu*.5) if altthreshcpiu<. & h_inc_tot_alt2<.;
gen halt2etpovlt150 = h_inc_tot_alt2<=(altthreshcpiu*1.5) if altthreshcpiu<. & h_inc_tot_alt2<.;
gen halt2etpov50100 = (altthreshcpiu*.5)<h_inc_tot_alt2 & h_inc_tot_alt2<=(altthreshcpiu) if altthreshcpiu<. & h_inc_tot_alt2<.;
gen halt2etpov100200 = (altthreshcpiu)<h_inc_tot_alt2 & h_inc_tot_alt2<=(altthreshcpiu*2) if altthreshcpiu<. & h_inc_tot_alt2<.;
gen halt2etpovcat = 1 if halt2etpovlt50==1 & h_inc_tot_alt2<.;
	replace halt2etpovcat = 2 if halt2etpov50100==1 & h_inc_tot_alt2<.;
	replace halt2etpovcat = 3 if halt2etpov100200==1 & h_inc_tot_alt2<.;
	replace halt2etpovcat = 0 if h_inc_tot_alt2>(altthreshcpiu*2) & h_inc_tot_alt2<.;
gen halt2etbelowpov = h_inc_tot_alt2<(altthreshcpiu) if altthreshcpiu<. & h_inc_tot_alt2<.;
su halt2etpov*;
tab halt2etpovcat, missing;
tab year if halt2etpovcat==.;

tab halt1etbelowpov halt2etbelowpov;
tab halt1etbelowpov hbelowpov;
tab halt2etbelowpov hbelowpov;


*** MB;
*** Keep hh altthreshcpiu, problem indicator, drop rest;
rename altthreshcpiu haltthreshcpiu;
rename problemaltthreshcpiu hprobaltthreshcpiu;
drop incyear famsize kids;

*DC changes from year>=1989 to year>=1988;
** check on people with problemaltthreshcpius;
list year hhid bfamid sfamid p_age p_sex h_numpers if hprobaltthreshcpiu==1 & year>=1988;
*** noone over 18 in hh;
assert p_age<18 if hprobaltthreshcpiu==1 & year>=1988 & year < 2016;
*DC ends;
*** MB end;

***************************************************************************;
*No Alt Experimental Poverty Thresholds for Small Families                *;
***************************************************************************;

***************************************************;
*Alt Experimental Poverty Thresholds Big Families *;
***************************************************;
*** These are created in altpovthreshold-05.do;
*** from 2005 exp. pov. report;
*** 3 scale equivalence but still mostly transfers/taxes;
*** backed up by CPI U;
*** could add CPI U RS but didn't;

gen kids = bf_kidu18;
** MB;
*** don't count small family head as a kid if small family number of persons==kid number of persons;
egen t = max(p_age<18 & bfhead==1 & bf_numpers==bf_kidu18), by(bfamid);
replace kids = bf_kidu18 -1 if t==1;
drop t;
* MB end;
replace kids = 8 if kids>8 & kids~=.;
gen famsize = bf_numpers;
	replace famsize = 9 if famsize>9 & famsize~=.;

*** no extra old/young category for 1/2 person families;

*** year of income;
gen incyear = year-1;

sort incyear kids famsize;
merge m:1 incyear kids famsize using adjusted_exp_thresholds.dta;
**ZZmerge m:1 incyear kids famsize using /data/brook/cycles/march/adjusted_exp_thresholds.dta;

tab incyear _merge;
*** MB most of 1s;
*** some small share are families with famsize=number of kids under 18;
*** these do not match, mark them;
*** _merge=1 means didn't match years in altthreshcpiu data;
gen problemaltthreshcpiu = _merge==1 ;
tab year problemaltthreshcpiu;
*** set altthreshcpiu to missing for these observation;
*** after drop later years;
*** drop if year>=1989;
replace altthreshcpiu=. if _merge==1;
su p_age bf_kidu18 kids famsize bf_numpers if _merge==1;
*** MB end;

*** Dropping combinations of people not in actual data no hhids;
su hhid if _merge==2;
drop if _merge==2;
drop _merge;

*** no regular poverty with this version of thresholds;

gen bfalt1etpovlt50 = bf_inc_tot_alt1<=(altthreshcpiu*.5) if altthreshcpiu<. & bf_inc_tot_alt1<.;
gen bfalt1etpovlt150 = bf_inc_tot_alt1<=(altthreshcpiu*1.5) if altthreshcpiu<. & bf_inc_tot_alt1<.;
gen bfalt1etpov50100 = (altthreshcpiu*.5)<bf_inc_tot_alt1 & bf_inc_tot_alt1<=(altthreshcpiu) if altthreshcpiu<. & bf_inc_tot_alt1<.;
gen bfalt1etpov100200 = (altthreshcpiu)<bf_inc_tot_alt1 & bf_inc_tot_alt1<=(altthreshcpiu*2) if altthreshcpiu<. & bf_inc_tot_alt1<.;
gen bfalt1etpovcat = 1 if bfalt1etpovlt50==1 & bf_inc_tot_alt1<.;
	replace bfalt1etpovcat = 2 if bfalt1etpov50100==1 & bf_inc_tot_alt1<.;
	replace bfalt1etpovcat = 3 if bfalt1etpov100200==1 & bf_inc_tot_alt1<.;
	replace bfalt1etpovcat = 0 if bf_inc_tot_alt1>(altthreshcpiu*2) & bf_inc_tot_alt1<.;
gen bfalt1etbelowpov = bf_inc_tot_alt1<(altthreshcpiu) if altthreshcpiu<. & bf_inc_tot_alt1<.;
su bfalt1etpov*;
tab bfalt1etpovcat, missing;
tab year if bfalt1etpovcat==.;

** alt definition 2;
gen bfalt2etpovlt50 = bf_inc_tot_alt2<=(altthreshcpiu*.5) if altthreshcpiu<. & bf_inc_tot_alt2<.;
gen bfalt2etpovlt150 = bf_inc_tot_alt2<=(altthreshcpiu*1.5) if altthreshcpiu<. & bf_inc_tot_alt2<.;
gen bfalt2etpov50100 = (altthreshcpiu*.5)<bf_inc_tot_alt2 & bf_inc_tot_alt2<=(altthreshcpiu) if altthreshcpiu<. & bf_inc_tot_alt2<.;
gen bfalt2etpov100200 = (altthreshcpiu)<bf_inc_tot_alt2 & bf_inc_tot_alt2<=(altthreshcpiu*2) if altthreshcpiu<. & bf_inc_tot_alt2<.;
gen bfalt2etpovcat = 1 if bfalt2etpovlt50==1 & bf_inc_tot_alt2<.;
	replace bfalt2etpovcat = 2 if bfalt2etpov50100==1 & bf_inc_tot_alt2<.;
	replace bfalt2etpovcat = 3 if bfalt2etpov100200==1 & bf_inc_tot_alt2<.;
	replace bfalt2etpovcat = 0 if bf_inc_tot_alt2>(altthreshcpiu*2) & bf_inc_tot_alt2<.;
gen bfalt2etbelowpov = bf_inc_tot_alt2<(altthreshcpiu) if altthreshcpiu<. & bf_inc_tot_alt2<.;
su bfalt2etpov*;
tab bfalt2etpovcat, missing;
tab year if bfalt2etpovcat==.;


tab bfalt1etbelowpov bfalt2etbelowpov;
tab bfalt1etbelowpov bfbelowpov;
tab bfalt2etbelowpov bfbelowpov;

* MB;
*** Keep bf altthreshcpiu, problem indicator, drop rest;
rename altthreshcpiu bfaltthreshcpiu;
rename problemaltthreshcpiu bfprobaltthreshcpiu;
drop incyear famsize kids;

*DC changes from year>=1989 to year>=1988;
** check on people with problemaltthreshcpius;
list year hhid bfamid sfamid p_age p_sex bf_numpers if bfprobaltthreshcpiu==1 & year>=1988;
*** noone over 18 in bf;
assert p_age<18 if bfprobaltthreshcpiu==1 & year>=1988 & year < 2016;
*DC ends;
* MB end;

*EK modifies this line so we don't drop components of income anymore;
drop fnmedtransfers fmedtransfers bfnmedtransfers bfmedtransfers bfemcont bffed_ret hnmedtransfers 
hmedtransfers hemcont hfed_ret ptaxesnet* htaxesnet* p_mvcaid p_mvcare p_ted_val;

***************************************************;
*Poverty Guidelines                               *;
***************************************************;
*** MB adds AK/HI and pre-82;
*** merge on previous year also, as guidelines change typically in March;
gen byte ak = statefip==2;
gen byte hi = statefip==15;
cap gen calyear = year-1;
sort calyear ak hi;
merge m:1 calyear ak hi using pov_guidelines_1977_2016.dta;
**Z merge m:1 calyear ak hi using /data/brook/cycles/march/pov_guidelines_1977_2014.dta;
tab calyear _merge;
tab year _merge;

*** 1s are;
*** for debugging;
*** pause on;
*** pause;
drop if _merge==2;
drop _merge;

*** Households;
gen hpovguide_amt = fpl1;
replace hpovguide_amt = fpl1 + fpladdl*(h_numpers-1) if h_numpers>1;
gen hpovguide_ratio = h_inc_tot/hpovguide_amt;


*** Small Families;
gen sfpovguide_amt = fpl1;
replace sfpovguide_amt = fpl1 + fpladdl*(sf_numpers-1) if sf_numpers>1;
gen sfpovguide_ratio = sf_inc_tot/sfpovguide_amt;

*** Big Families;
gen bfpovguide_amt = fpl1;
replace bfpovguide_amt = fpl1 + fpladdl*(bf_numpers-1) if bf_numpers>1;
gen bfpovguide_ratio = bf_inc_tot/bfpovguide_amt;

/*; 
*** comment out for now;
*** Census Families;
*** use reported census family income;
*** CAUTION INCONSISTENT WITH OFFICIAL POVERTY;
*** COnsistent with census family income and number of people;
*** and number of people;
gen cfpovguide_amt = fpl1;
replace cfpovguide_amt = fpl1 + fpladdl*(cf_numpers-1) if cf_numpers>1;
gen cfpovguide_ratio = cf_inc_tot/cfpovguide_amt;
** end of comment;
*/;

*** MB END;

drop fpl1 fpladdl;

***************************************************;
*Deflate using CPI-U                              *;
** (was cpi-U X1, same from 82 on) marches HH     *;
***************************************************;
gen incyear = year-1;
sort incyear;
merge m:1 incyear using cpi;
*Z merge m:1 incyear using /data/brook/cycles/march/cpi;
tab _merge;
*drops years from CPI data not in the CPS sample*;
keep if _merge==3; 
drop _merge;
*** to do put back cf_inc*;
su h_inc_tot if year==2009;
*** put in real 2009 $;
*** CPI for 2009 is 214.537;
*** do other values too;
foreach n of varlist p_inc_* h_inc* bf_inc* sf_inc* f_inc* h_fsval h_enrgyva {;
  qui gen r_`n' = (`n' * 214.537)/(cpi_u);
};

***************************************************;
*Education variables/Weight                       *;
***************************************************;
gen lths = (p_educ<=11) if year<1992;
	replace lths = 1 if p_educ==12 & p_gradecom==2 & year<1992;
gen hsgrad = (p_educ==12 & p_gradecom==1) if year<1992;
gen somecol = (p_educ>12 & p_educ<=15) if year<1992;
	replace somecol = 1 if (p_educ==16 & p_gradecom==2) & year<1992;
gen colgrad = (p_educ==16 & p_gradecom==1) | (p_educ>16 & p_educ~=.) if year<1992;

replace lths = (p_educ<39) if year>=1992;
replace hsgrad = p_educ==39 if year>=1992;
replace somecol = (p_educ>=40 & p_educ<=42) if year>=1992;
replace colgrad = (p_educ>=43 & p_educ~=.) if year>=1992;

gen edcat=1 if lths==1;
	replace edcat=2 if hsgrad==1;
	replace edcat=3 if somecol==1;
	replace edcat=4 if colgrad==1;
tab edcat, missing;

*DC adds on 11/26/2014;

gen degree_2yr= (p_educ>=14) if year<1992;
replace degree_2yr= (p_educ>=42) if year>=1992;
gen degree_4yr= (p_educ>=16) if year<1992;
replace degree_4yr= (p_educ>=43) if year>=1992;

gen enrolled= (p_ftpt_st==1 | p_ftpt_st==2);
tab2 p_attend enrolled;
gen enrolled_ft= p_ftpt_st==1;

gen hsgrad_enrolled = (p_educ>=12 & enrolled==1) if p_educ>=12 & year<1992; 
replace hsgrad_enrolled = (p_educ>=39 & enrolled==1) if p_educ>=39 & year>=1992; 

*DC ends;
	
gen head_edcat = edcat if head==1;
replace head_edcat = 0 if head~=1;
bysort hhid: egen h_edcat = max(head_edcat);
gen head_wgt = p_marwt if head==1;
replace head_wgt = 0 if head~=1;
bysort hhid: egen h_wgt = max(head_wgt);

*** MB;
*** check this;
su h_wgt h_edcat;
su h_wgt h_edcat if head==1;
tab year if h_wgt==0 & h_edcat==0;
*** MB end;

drop head_edcat head_wgt;


*** Small family;
gen sfhead_edcat = edcat if sfhead==1;
replace sfhead_edcat = 0 if sfhead~=1;
gen sfhead_wgt = p_marwt if sfhead==1;
replace sfhead_wgt = 0 if sfhead~=1;
bysort sfamid: egen sf_edcat = max(sfhead_edcat);
bysort sfamid: egen sf_wgt = max(sfhead_wgt);

*** check this;
su sf_wgt sf_edcat;
su sf_wgt sf_edcat if head==1;
tab year if sf_wgt==0 & sf_edcat==0;

drop sfhead_edcat sfhead_wgt;

*** Big family;
gen bfhead_edcat = edcat if bfhead==1;
replace bfhead_edcat = 0 if bfhead~=1;
gen bfhead_wgt = p_marwt if bfhead==1;
replace bfhead_wgt = 0 if bfhead~=1;
bysort bfamid: egen bf_edcat = max(bfhead_edcat);
bysort bfamid: egen bf_wgt = max(bfhead_wgt);

drop bfhead_edcat bfhead_wgt;

*** check this;
su bf_wgt bf_edcat;
su bf_wgt bf_edcat if head==1;
tab year if bf_wgt==0 & bf_edcat==0;

/*;
comment out for now;
*** CPS family;
*** use reported census family income;
*** CAUTION INCONSISTENT WITH OFFICIAL POVERTY;
*** COnsistent with census family income and number of people;
*** and number of people;
gen cf_edcat = bf_edcat if year>=1989 & f_type!=3;
replace cf_edcat = sf_edcat if year>=1989 & f_type==3;

gen cf_wgt = bf_wgt if year>=1989 & f_type!=2;
replace cf_wgt = sf_wgt if year>=1989 & f_type==3;

*** check this;
su cf_wgt cf_edcat;
su cf_wgt cf_edcat if head==1;
tab year if cf_wgt==0 & cf_edcat==0;
** end of comment;
*/;

***************************************************;
*Race/ethnicity/Age/Marital Status/Sex Variables  *;
***************************************************;
*** change to y/n post 02;
gen hisp = p_ethnicity~=8 & p_ethnicity>0 & year<=2002;
*doesn't know or refused to answer*;
	replace hisp=. if (p_ethnicity==9 | p_ethnicity==10) & year<=2002; 
	replace hisp = 0 if year>2002;
	replace hisp = 1 if year>2002 & p_ethnicity==1;
tab hisp, missing;
gen hispdkrf = hisp==.;
replace hisp=0 if hispdkrf==1;

* check;
tab hisp, missing;
*** White non-Hispanic;
gen white = p_race==1 & hisp==0 & hispdkrf==0;
gen blackhisp = p_race==2 | hisp==1 if year<=2002;
	replace blackhisp = (hisp>0 | p_race==2 | p_race==6 | p_race==10 | p_race==11 | p_race==12 | p_race==15 | p_race==16 | p_race==19) if year>2002;
tab blackhisp, missing;

* MB;
gen black = p_race==2 & hisp==0 if year<=2002;
	replace black = (hisp==0 & (p_race==2 | p_race==6 | p_race==10 | p_race==11 | p_race==12 | p_race==15 | p_race==16 | p_race==19)) if year>2002;
tab black, missing;

gen other = black==0 & hisp==0 & white==0 & hispdkrf==0;

su other black hisp white hispdkrf;

** check;
gen t = white + black + hisp + other + hispdkrf;
tab t, missing;
drop t;
* MB end;

gen p_male = p_sex==1;
tab p_male p_sex;


foreach x in white blackhisp black hisp hispdkrf other p_age p_marst p_male {;
gen head_`x' = `x' if head==1;
replace head_`x' = 0 if head~=1;
bysort hhid: egen h_`x' = max(head_`x');
};

rename h_p_age h_age;
rename h_p_marst h_marst;
rename h_p_male h_male;
for any white blackhisp hisp hispdkrf black other p_age p_marst p_male: drop head_X;

*** check;
for any edcat white blackhisp black hisp hispdkrf other : tab h_X X if p_relhd==1 | p_relhd==2;
*EK switched from tab p_X h_X to bysort h_X: tab p_X, because previously there was an error message
that there were too many values for tab;
for any age marst male: bysort h_X: tab p_X if p_relhd==1 | p_relhd==2;

*** Small family;
foreach x in white blackhisp hisp hispdkrf black other p_age p_marst p_male {;
gen sfhead_`x' = `x' if sfhead==1;
replace sfhead_`x' = 0 if sfhead~=1;
bysort hhid: egen sf_`x' = max(sfhead_`x');
};
rename sf_p_age sf_age;
rename sf_p_marst sf_marst;
rename sf_p_male sf_male;
for any white blackhisp hisp hispdkrf black other p_age p_marst p_male: drop sfhead_X;

*** Big family;
foreach x in white blackhisp hisp hispdkrf black other p_age p_marst p_male {;
gen bfhead_`x' = `x' if bfhead==1;
replace bfhead_`x' = 0 if bfhead~=1;
bysort hhid: egen bf_`x' = max(bfhead_`x');
};
rename bf_p_age bf_age;
rename bf_p_marst bf_marst;
rename bf_p_male bf_male;
for any white blackhisp hisp hispdkrf black other p_age p_marst p_male: drop bfhead_X;

/*;
*** comment out for now;
*** CPS family;
*** use reported census family income;
*** CAUTION INCONSISTENT WITH OFFICIAL POVERTY;
*** COnsistent with census family income and number of people;
*** and number of people;
for any white blackhisp hisp hispdkrf black age marst male: gen cf_X = bf_X if year>=1989 & f_type!=3;
for any white blackhisp hisp hispdkrf black age marst male: replace cf_X = sf_X if year>=1989 & f_type==3;
*** comment out for now;
*/;

***************************************************;
* Health Insurance Variables                      *;
***************************************************;
*** going to try to get a clean household measure;
*** that's all for now;
*** Variables change here;
*** A) Asked consistently and recoded by Census 89-12;
*** for HH;
**** was there anyone in HH with medicare? Medicaid? champ, any HI (MB THINKS ITS ANY PRIVATE HI=hhiyn)?
for any mcare mcaid champ hhiyn: tab year h_X ;

*** see if match across groups all with care/caid/champ should have hhiyn==1;
*** al with hhiyn =2 should have others 0;
for any mcare mcaid champ: tab h_X h_hhiyn, missing;

*** For people;
*** For all covered by group health, covered by any private;
*** for children under 15, coverd by private HI (within HH, out of hh), covered by  medicare/medicaid;
for any cov_gh cov_hi ch_mc ch_hi chip: tab year p_X ;


*** also a care/caid/champ/cov_gh/hiemp/hi_yn variables;
*** See if these are consistent with person stuff;
egen t=max(p_care==1), by(hhid);
tab t h_mcare;
su t h_mcare;
drop t;

egen t=max(p_caid==1), by(hhid);
tab t h_mcaid;
su t h_mcaid;
drop t;

egen t=max(p_champ==1), by(hhid);
tab t h_champ;
su t h_champ;
drop t;

*** coverage at all;
egen t=max(p_cov_hi==1), by(hhid);
tab t h_hhiyn;
su t h_hhiyn;
drop t;

*** coverage group;
egen t=max(p_cov_gh==1), by(hhid);
** can't quite tab anything HH with employer;
tab h_hhiyn t;
drop t;


**** Check that kid variables match with adult;
*** kid under 15 on care/caid;
egen t2 = max(p_age<15), by(hhid);
egen t=max(p_ch_mc==1) if t2==1, by(hhid);
tab t h_mcare;
tab t h_mcaid;
tab t h_mcare if h_mcaid!=1;
su t h_mcare h_mcaid if t2==1;
su t h_mcare h_mcaid if t2!=1;
drop t t2;

*** kid under 15 has hi;
egen t2 = max(p_age<15), by(hhid);
egen t=max(p_ch_hi==1) if t2==1, by(hhid);
tab t h_hhiyn;
su t h_hhiyn if t2==1;
su t h_hhiyn if t2!=1;
drop t t2;


*** B) CHIP appears in 2001, for under age 19. Use 18 uyear olds too;
***   Seems not to match p_ch_mc for kids that age;
tab p_chip p_ch_mc;

*** check that chip matches with overall household care/caid HI;
egen t2 = max(p_age<=18), by(hhid);
egen t=max(p_chip==1) if t2==1, by(hhid);
tab t h_mcaid;
tab t h_hhiyn;
su t h_mcaid h_hhiyn if t2==1;
su t h_mcaid h_hhiyn if t2!=1;
drop t t2;

*** only for 15 plus, 80 on, recode;
gen p_medicare = 1 if p_care==1;
*** 0 is kid under 15;
replace p_medicare= 0 if p_care==2 | p_care==0;
tab p_medicare p_care, missing;
**pause on;
***pause;

*** only asked 15 plus, 80 on, recoded to include kids;
*** Add any SCHIP;
gen p_medicaid = 1 if p_caid==1 | p_chip==1;
replace p_medicaid= 0 if p_caid!=1 & p_chip!=1;
tab p_medicaid p_caid, missing;
tab p_medicaid p_chip, missing;
tab p_medicaid p_caid if p_chip!=1, missing;
***pause;


*** 80 on, recoded to include kids;
*** (is also p_hiemp but that's my plan);
*** not sure what makes p_hiemp NIU;
gen p_empgrouphi = 1 if p_cov_gh==1;
replace p_empgrouphi= 0 if p_cov_gh==2;
tab p_empgrouphi p_hiemp, missing;
tab p_empgrouphi p_cov_gh, missing;
***pause;

*** 80 on, recoded to include kids;
gen p_militarhi = 1 if p_champ==1;
replace p_militarhi= 0 if p_champ==2;
tab p_militarhi p_champ, missing;
***pause;


*** see if p_cov_gh p_mcaid p_care are inclusive;
tab p_cov_gh p_caid;
tab p_cov_gh p_care;
tab p_cov_gh p_care if p_caid!=1;

*** see if 3 of these sums to right thing for p60s;
gen anypcov = p_cov_gh==1 | p_caid==1 |p_care==1;
gen anypcov2 = p_cov_gh==1 | p_caid==1 |p_care==1 | p_cov_hi==1;
table year, c(sum p_marwt) format(%12.0f);
table year if anypcov==1, c(sum p_marwt) format(%12.0f);
table year if anypcov2==1, c(sum p_marwt) format(%12.0f);

drop anypcov anypcov2;

*** no othhi, use any HI versus hi emp;

*** our variables;
*** public will incluce military and CHIP for <=19;
*** medicaid here includes chip;
gen p_publichi = p_medicaid==1 | p_medicare ==1 |p_militarhi==1 if p_medicaid<. & p_medicare<. & p_militarhi<.;
label variable p_publichi "Medicaid/Medicare/Military/CHIP after 2001";

*** use cov_hi y/n for individuals in place of other hi;
gen p_anyhi1 = p_publichi==1 | p_empgrouphi==1 | p_cov_hi==1  if p_publichi<. & p_empgrouphi<. & p_cov_hi<.;
*pause;
label variable p_anyhi1 "Medicaid/Medicare/Military/Employer provided Group/Other HI/CHIP 01 on, missing 82-83";

gen p_nohi1 = 1-p_anyhi1;
label variable p_nohi1 "No Medicaid/Medicare/Military/Employer provided Group/Other HI/CHIP 01 on, missing 82-83";

gen p_anyhi2 = p_publichi==1 | p_empgrouphi==1 if p_publichi<. & p_empgrouphi<.;
label variable p_anyhi2 "Medicaid/Medicare/Military/Employer provided Group/CHIP 01 on";

gen p_nohi2 = 1-p_anyhi2;
label variable p_nohi2 "No Medicaid/Medicare/Military/Employer provided Group/CHIP 01 on excludes individual/retiree";

for any 1 2: tab p_anyhiX p_nohiX;

for any public medicaid medicare empgrouphi militarhi anyhi1 anyhi2: tab year p_X;

tab p_medicaid p_medicare if p_militarhi==1, su(p_publichi);

tab p_medicaid p_medicare if p_militarhi==0, su(p_publichi);


*** variables for kids;
*** stick to under 18 even though medicaid can be for 18 year olds;
*** includes CHIP;
gen p_kidcaid = p_medicaid  * (p_age<18);
tab p_medicaid p_kidcaid if p_age<18;
tab p_medicaid p_kidcaid if p_age>=18;

*** includes chip;
gen p_kidpub = p_publichi  * (p_age<18);
tab p_publichi p_kidpub if p_age<18;
tab p_publichi p_kidpub if p_age>=18;


gen p_kidanyhi1 = p_anyhi1  * (p_age<18);
tab p_anyhi1 p_kidanyhi1 if p_age<18;
tab p_anyhi1 p_kidanyhi1 if p_age>=18;

gen p_kidanyhi2 = p_anyhi2  * (p_age<18);
tab p_anyhi2 p_kidanyhi2 if p_age<18;
tab p_anyhi2 p_kidanyhi2 if p_age>=18;

gen p_kidnohi1 = p_nohi1  * (p_age<18);
tab p_nohi1 p_kidnohi1 if p_age<18;
tab p_nohi1 p_kidnohi1 if p_age>=18;

gen p_kidnohi2 = p_nohi2  * (p_age<18);
tab p_nohi2 p_kidnohi2 if p_age<18;
tab p_nohi2 p_kidnohi2 if p_age>=18;

drop p_caid;
drop p_hiemp p_cov_gh;
drop p_champ;


*****************************************;
*** Measures for anyone in HH/Families***;
*****************************************;

**** Household;
foreach x in medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 
	kidanyhi2 kidnohi1 kidnohi2 {;
bysort hhid: egen h_any`x' = max(p_`x');
};

*** tab with canned;
*** won't quite match chip maybe so ours includes chip might be more;
tab h_anymedicaid h_mcaid;

*** think hhiyn is private;
tab h_anyanyhi1 h_hhiyn;
tab h_anyanyhi1 h_mcaid if h_hhiyn!=1;
tab h_anyanyhi1 h_mcare if h_hhiyn!=1 & h_mcaid!=1;

tab h_anyanyhi2 h_hhiyn;
tab h_anyanyhi2 h_mcaid if h_hhiyn!=1;
tab h_anyanyhi2 h_mcare if h_hhiyn!=1 & h_mcaid!=1;

tab h_anynohi1 h_hhiyn;

tab h_anynohi2 h_hhiyn;

*pause;


**** Small family;
foreach x in medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 {;
bysort sfamid: egen sf_any`x' = max(p_`x');
};

**** Big family;
foreach x in medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 {;
bysort bfamid: egen bf_any`x' = max(p_`x');
};

/*; 
** comment out for now;

*** Census family;
*** CPS family;
*** Census Families;
*** use reported census family income;
*** CAUTION INCONSISTENT WITH OFFICIAL POVERTY;
*** COnsistent with census family income and number of people;
*** and number of people;
for any medicaid publichi  empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 : gen cf_anyX = bf_anyX if year>=1989 & f_type!=3;
for any medicaid publichi empgrouphi anyhi1 anyhi2 nohi1 nohi2 kidcaid kidpub kidanyhi1 kidanyhi2 kidnohi1 kidnohi2 : replace cf_anyX = sf_anyX if year>=1989 & f_type==3;

** comment out;
*/;

*pause on;
*pause;


*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
****************************************************;
*** Measures for type of immigrant in HH/Families***;
****************************************************;

*** person;
gen byte p_borncit = p_prcitshp >=1 & p_prcitshp<=3;
gen byte p_natcit = p_prcitshp==4;
gen byte p_noncit = p_prcitshp==5;

*** starts in 1994;
for any borncit natcit noncit: replace p_X =. if year<=1993;
gen byte p_notborncit = 1-p_borncit;
gen t = p_borncit + p_natcit + p_noncit;

tab t;
drop t; 
assert p_notborncit+p_borncit==1 if p_borncit<. | p_notborncit<.;

*** have learned that existing (here) marchcps do files for 2002 and 1996;
*** have wrong labels for peinsuyr;
*** 1996 code of 14 should be 1994-1996, and 13 1992-1993;
*** 2002 code of 16 should be 1998-1999, and 17 2000-2002;
*** We think that 2009 code of 20 should be 2006-2009 not 2008 as it says;

gen p_camege98 = 0 if year>=1994 & year!=1998 & year!=1999;
replace p_camege98 = 1 if year>=1994 & p_peinusyr>=16 & p_camege98<.;
tab year p_camege98, missing;

gen p_camege96 = 0 if year>=1994;
replace p_camege96 = 1 if year>=1994 & p_peinusyr>=15 & p_camege96<.;
tab year p_camege96, missing;

tab p_camege98 p_camege96;
tab p_camege98 p_borncit;
tab p_camege96 p_borncit;

*** variable for at least 5 years in US;
*** can't do exactly;
*** use last 3 codes of peinusyr;
*** last code is range of 2 years 3 months (even years) and 3 years 3 months (odd years);
*** for even years max would be 6 years 3 months if they came in 1/1 of first year;
*** for even years min would be 4 years 4 months if they came in 12/31 of second year;
*** for odd years max would be 7 years 3 months if they came in 1/1 of first year;
*** for odd years min would be 5 years 4 months if they came in 12/31 of second year;
*** NOTE THIS IS ASKED OF US BORN ABROAD OF AMERICAN PARENT/BORN in PUERTO RICO;
gen p_camele5years = 0 if year>=94;
replace p_camele5years = 1 if p_peinusyr >=19 & year==2010;
replace p_camele5years = 1 if p_peinusyr >=18 & (year==2009 | year==2008);
replace p_camele5years = 1 if p_peinusyr >=17 & (year==2007 | year==2006);
replace p_camele5years = 1 if p_peinusyr >=16 & (year==2005 | year==2004);
replace p_camele5years = 1 if p_peinusyr >=15 & (year==2003 | year==2002);
replace p_camele5years = 1 if p_peinusyr >=14 & (year==2001 | year==2000);
replace p_camele5years = 1 if p_peinusyr >=13 & (year==1999 | year==1998);
replace p_camele5years = 1 if p_peinusyr >=12 & (year==1997 | year==1996);
replace p_camele5years = 1 if p_peinusyr >=11 & (year==1995 | year==1994);

tab p_camele5years p_borncit;


*** recode all the new time since came to the US variables to be 0 if a borncit;
*** decision 6/2011;
for any p_camele5years p_camege96 p_camege98: replace X = 0 if X==1 & p_borncit==1;

tab p_camele5years p_borncit;

*** any kids with relevant measure;
for any borncit notborncit natcit noncit camele5years camege96 camege98: gen kidX = p_X==1 & p_age<18 if p_X<.;
assert kidborncit==p_borncit if p_age<18;


**** Household;
foreach x in borncit natcit noncit notborncit camele5years camege96 camege98 {;
bysort hhid: egen h_any`x' = max(p_`x');
bysort hhid: egen h_anykid`x' = max(kid`x');
};

**** Small family;
foreach x in borncit natcit noncit notborncit  camele5years camege96 camege98{;
bysort sfamid: egen sf_any`x' = max(p_`x');
bysort sfamid: egen sf_anykid`x' = max(kid`x');
};

**** Big family;
foreach x in borncit natcit noncit notborncit  camele5years camege96 camege98 {;
bysort bfamid: egen bf_any`x' = max(p_`x');
bysort bfamid: egen bf_anykid`x' = max(kid`x');
};


drop kidborncit kidnotborncit kidnatcit kidnoncit kidcamele5years kidcamege96 kidcamege98;
**********************************;
** Citizenship of Householder**;
**********************************;
*** New variables, to do : move to march program;
gen tmp1 = p_borncit==1 & head==1;
gen tmp2 = p_notborncit==1 & head==1;
gen tmp3 = p_natcit==1 & head==1;
gen tmp4 = p_noncit==1 & head==1;
egen h_head_borncit= max(tmp1), by(hhid year);
egen h_head_notborncit= max(tmp2), by(hhid year);
egen h_head_natcit= max(tmp3), by(hhid year);
egen h_head_noncit= max(tmp4), by(hhid year);

tab h_head_borncit p_borncit if head==1;
tab tmp1 h_head_borncit if head==1;
drop tmp1 tmp2 tmp3 tmp4;

gen tmphead = p_camele5years==1 if head==1;
egen h_head_camele5years=max(tmphead), by(year hhid);
tab tmphead h_head_camele5years;
replace tmphead= 1- p_camele5years if head==1;
egen h_head_notcamele5years=max(tmphead), by(year hhid);
tab tmphead h_head_notcamele5years;
tab h_head_notcamele5years h_head_camele5years;
drop tmphead;


/*; 
** comment out for now;

*** Census family;
*** CPS family;
*** Census Families;
*** use reported census family income;
*** CAUTION INCONSISTENT WITH OFFICIAL POVERTY;
*** COnsistent with census family income and number of people;
*** and number of people;

for any borncit natcit noncit notborncit camele5years camege96 camege98: gen cf_anyX = bf_anyX if year>=1989 & f_type!=3;
for any borncit natcit noncit notborncit camele5years camege96 camege98: replace cf_anyX = sf_anyX if year>=1989 & f_type==3;

** comment out;
*/;

*pause on;
*pause;
*EK ends;



******************************************************************************;
* Food Stamps, School Lunch, LIHEAP, Tenure, Public Housing, Rental subsidy  *;
******************************************************************************;
*** Only household concepts;

*** left out category tenure==3, no cash rent;
gen h_rents = h_tenure==2 if h_tenure<.;
gen h_owns= h_tenure==1 if h_tenure<.;

tab h_rents h_tenure;
tab h_owns h_tenure;
label variable h_rents "HH rents for cash";
label variable h_owns "HH owned or being bought";

tab h_tenure h_public;
*** most of niu for public is owned;
recode h_public (2=0);

tab h_tenure h_rentsub;
*** most of niu for rentsub is owned;
recode h_rentsub (2=0);

** drop tenure now;
drop h_tenure;


recode h_foodstp (2=0);
tab year h_foodstp, missing;

tab h_foodstp, su(h_fsnum);

su h_foodstp h_fsnum r_h_fsval;

for any hhotlun freelun: recode h_X(2=0);
for any hhotlun freelun: tab year h_X, missing;
su h_hhotlun h_freelun h_numfree;

** weird small number of missings, leave as 0;
recode h_enrgyas (2=0);
tab year h_enrgyas, missing;

su h_enrgyas r_h_enrgyva;

****************************************;
** Self-rated health only 96 on        *;
****************************************;
gen p_hfairpoor = p_srhealth==4 | p_srhealth==5 if p_srhealth>0 & p_srhealth<.;
gen p_hexcgood = p_srhealth==1 | p_srhealth==2 if p_srhealth>0 & p_srhealth<.;

tab p_srhealth p_hfairpoor;
tab p_srhealth p_hexcgood;
label variable p_hexcgood "General health is excellent or good";
label variable p_hfairpoor "General health is fair or poor";

egen h_hfairpoor = max(p_hfairpoor), by(hhid);
egen h_hexcgood = max(p_hexcgood), by(hhid);

*****************************;
** WIC from 01 on           *;
*****************************;
recode h_wicyn (2=0);



*EK adds living arrangement variables;
****************************************;
** Living Arrangements		       	   *;
****************************************;
* First tab important variables that we use to construct variables;
for any p_ftpt p_ftpt_st p_famrel p_relhd: tab X, missing;
tab f_type p_famtyp, missing;   /*same in all observations, except 5.*/;

* Number of people in household and in big family already exist. They are: h_numpers and bf_numpers ;

* Number of families in each household;
gen family = (f_type==1 | f_type==3 | f_type==4) & sfhead==1;
bysort hhid: egen h_numfams = total(family);
tab h_numfams, missing;

* Number of small family heads in each household;
bysort hhid: egen h_numsfheads = total(sfhead);
tab h_numsfheads, missing;

* Extra adults: num of adults other than the householder or spouse of the householder;
gen othadult = p_age>=18 & head==0 & p_relhd!=3 & p_relhd!=4; 
*Note, initially EK was also excluding those with p_relhd=13 (either partner or roommate), but HH
suggested to drop this exclusion;
bysort hhid: egen h_numothadults = total(othadult);

* Presence of young adult (18-30) not in college and not working full time;
* HH asks to construct two variables, one with and the other without the school enrol var (present 
from 1986 onwards);
* Version 1;
gen yadult = 0;
replace yadult = 1 if p_age>=18 & p_age<=30 & p_rea_nlf!=1 & p_ftpt!=2 & year<1994; 
/*diff variable post94*/;
replace yadult = 1 if p_age>=18 & p_age<=30 & p_rea_nlf_sc!=1 & p_ftpt!=2 & year>=1994; 
bysort hhid: egen h_numyadults = total(yadult);
gen h_anyyadults1 = h_numyadults>0;
tab h_numyadults h_anyyadults1, missing;
drop yadult h_numyadults;
* Version 2;
gen yadult = 0;
replace yadult = 1 if p_age>=18 & p_age<=30 & p_rea_nlf!=1 & p_ftpt!=2 & p_attend!=1 & year<1994; 
/*diff variable post94*/;
replace yadult = 1 if p_age>=18 & p_age<=30 & p_rea_nlf_sc!=1 & p_ftpt!=2 & p_attend!=1 & year>=1994; 
bysort hhid: egen h_numyadults = total(yadult);
gen h_anyyadults2 = h_numyadults>0;
tab h_numyadults h_anyyadults2, missing;
drop yadult h_numyadults;

*Define young adult sample so that we can create living situations for young adults only;
gen yadult = p_age>=18 & p_age<=30;

* Young adult living alone or with other unrelated individuals;
tab h_numfams yadult, missing;
gen yadult_alone = yadult==1 & h_numfams==0;
tab h_numfams yadult_alone, missing; 

* Is the head of HH or sf a young adult?;
gen headyadult = yadult ==1 & head == 1;
bysort hhid: egen h_headyadult=total(headyadult);
tab h_numfams headyadult, missing;
tab f_type h_headyadult, missing;

gen sfheadyadult = yadult==1 & sfhead == 1;
bysort sfamid: egen sf_headyadult=total(sfheadyadult);
tab h_numfams sfheadyadult, missing;
tab f_type sf_headyadult, missing;

* Young adult living with parents;
gen yadult_par = 0;
replace yadult_par = 1 if yadult==1 & p_famrel==3; 		/*the young adults is child of the sf head*/;
for any h_numfams f_type: tab X yadult_par, missing; 	/*seems ok*/;

replace yadult_par = 1 if yadult==1 & p_relhd==5;   	/*young adult is child of the hh head*/;
for any h_numfams f_type: tab X yadult_par, missing; 	/*seems ok*/;
replace yadult_par = 1 if yadult==1 & p_relhd==11;		/*young adult is foster child of the hh head*/;
for any h_numfams f_type: tab X yadult_par, missing;	/*foster kids seem to be secondary indivs*/;

gen temp1 = h_headyadult==1 & p_relhd==8;
bysort hhid: egen temp2=total(temp1);
replace yadult_par = 1 if headyadult==1 & temp2>0;      /*are parents of young head present in hh?*/;
for any h_numfams f_type: tab X yadult_par, missing;
drop temp1 temp2;
*initially, EK was also including step children, but she noticed this variable was discontinued in 1993; 

* Young adult living with other relatives;
gen yadult_rel = 0;
replace yadult_rel = 1 if yadult==1 & p_famrel==4; /*the young adults is relative of the sf head*/;
*young adult is sibling or other relative of the household head;
replace yadult_rel = 1 if yadult==1 & (p_relhd==7 | p_relhd==9 | p_relhd==10); 
for any h_numfams f_type: tab X yadult_rel, missing; 

gen temp1 = sf_headyadult==1 & p_famrel==4;
bysort sfamid: egen temp2=total(temp1);
tab f_type temp2, missing;
replace yadult_rel = 1 if sfheadyadult==1 & temp2>0;      /*are relatives of young sf head present in sf?*/;
for any h_numfams f_type: tab X yadult_rel, missing;

* Young adult living with others;
gen yadult_oth = yadult==1 & (yadult_rel + yadult_par + yadult_alone == 0);

* Create missing variable for young adult variables if no young adults;
* By doing this, when we collapse at the state level we have averages for the young adults sample only;
for any yadult_alone yadult_par yadult_rel yadult_oth: replace X=. if yadult==0;
gen temp3 = yadult_rel + yadult_par + yadult_alone + yadult_oth;
tab yadult temp3, missing;

drop temp1 temp2 temp3 family *eadyadult;

* Add labels;
label var h_numfams "Number of families in HH";
label var h_numsfheads "Number of small family heads in HH"; 
label var h_numothadults "Number of other adults in HH";
label var h_anyyadults1 "Presence of young adults not in school and not working FT in HH, Ver 1";
label var h_anyyadults2 "Presence of young adults not in school and not working FT in HH, Ver 2";
label var yadult "Young adult (18-30)";
label var yadult_alone "Young adult living alone or with unrelated individuals";
label var yadult_par "Young adult living with parents";
label var yadult_rel "Young adult living with other relatives";
label var yadult_oth "Young adult living with other people";

*EK ends here;

*DC adds living arrangement variables from micro.do, lines 93-509;

*use micro , clear;

capture drop calyear;
gen calyear=year-1;


*********************************************************
*********************************************************
*********************************************************
***       understand better the young adult stuff    ****
***       suggest alternative codings
*********************************************************
*********************************************************
*********************************************************

*********************************************************
***       understand better the young adult stuff    ****
*********************************************************

*** in 89 on period, know parentline if they are in house;
*** see how good a job we are doing;


for any alone par rel oth:  tab p_parentline yadult_X if p_age>=18 & p_age<=30;
*** for alone, makes sense, almost none of those alone have any parent in HH;
tab p_parentline p_famrel if  yadult_alone==1 & p_parentline!=0;
*** ftype==5;
tab p_parentline f_type if  yadult_alone==1 & p_parentline!=0, nolab;

*** for par a small number of parents lines defined, but not yadult_par==1;
tab p_parentline p_famrel if  yadult_par==0 & p_parentline!=0;

*** for rel, most of parent line is 2 relationship;
tab p_parentline p_famrel if  yadult_rel==1 & p_parentline!=0;

** see marital status for lots of the yadults;
tab  p_marst yadult_oth if yadult==1;
tab  p_marst yadult_rel if yadult==1;

*** try to see who others are;
*** evenly split between heads and others;
*** how many are heads;
tab head yadult_oth;

*** look at relation to head;
*** many heads;
*** many wives;
*** wives of heads , husbands of heads;
tab p_famrel if yadult_oth==1;
*** many married;
tab p_marst if yadult_oth==1;

*** many heads; 
tab head if yadult_oth==1;

*DC changes from year>=1989 to year>=1988;
** many wives and ref. person with relatives;
tab p_relhd if yadult_oth==1 & h_numfams==1 & year>=1988;
*DC ends;

*****************************************************
*** Now try to define consistent value MB suggests **
*****************************************************

*** try to think through how to define;
*** young adult living alone, no other families;
*** alternative would say, I am a young adult, no other families I am related too;
*** see how different that is;
*** this way might include POSSLQs;

for any alonall alonenokid alonewkid alone marriednokid marriedwkid married par rel: gen myadult_X = 0 if yadult==1;

***************************
*** Alone, no kids	***
***************************
*** head, no relatives;
replace myadult_alonenokid = 1 if yadult==1 &  (p_relhd==2 | p_relhd==7 ) & year<=1988;
*DC changes from year>=1989 to year>=1988;
replace myadult_alonenokid = 1 if yadult==1 &  (p_relhd==2 | p_relhd==14 ) & year>=1988;
*DC ends;

*** correlation of this and current EK version is 0.79 for CA;
tab myadult_alonenokid yadult_alone;
corr myadult_alonenokid yadult_alone;

*** mine is 1, hers is 0, unrelated individuals who are never heads, or 2s all who are heads;
tab year p_relhd if myadult_alonenokid==1 & yadult_alone==0, nolab;
tab year p_relhd if myadult_alonenokid==1 & yadult_alone==0, su(head) nolab;

*** hers is 1 mine is 0, all 11, 12, 13 for 89 forward;
*** Foster kids probably shouldn't count as living alone so OK to leave those out;
*** Partner roommate likely contains some cohabs but nothing we can do about this; 
*** foster kids (11), nonrelatives with relatives starting in 1994 (12) and 13 (roommate/partner);
tab year p_relhd if myadult_alonenokid==0 & yadult_alone==1, nolab;
tab year p_relhd if myadult_alonenokid==0 & yadult_alone==1, su(head) nolab;
**** here 11s/12s are family type 5 (unrelated individuals); 
**** possible coding error in 12 f_type or p_relhd;
tab f_type if myadult_alonenokid==0 & yadult_alone==1 & p_relhd==12;
*** all are subfamilies of 1, stick with assumption 12 is error, trusting relationship to head;
tab sf_numpers if myadult_alonenokid==0 & yadult_alone==1 & p_relhd==12;


*** set roommate/partners to be young adults living alone if they have no family;
*** first check they have no family;
tab p_famrel  p_relhd if myadult_alonenokid==0 & yadult_alone==1;
tab sf_numpers if myadult_alonenokid==0 & yadult_alone==1;
*DC changes from year>=1989 to year>=1988;
replace myadult_alonenokid=1 if yadult==1 & p_relhd==13 & year>=1988;
*DC ends;  



***************************
*** Alone, w kids	***
***************************

*** now see what gets added if you are allowed to have your own kids;

*** indicator for being child of head;
gen temp1 = p_relhd==4 if year<=1988;
*DC changes from year>=1989 to year>=1988;
replace temp1 = p_relhd==5 if year>=1988;
*DC ends;
*** any child of head in HH;
egen numtemp1 = total(temp1), by(hhid);
*** alone with a kid if I'm the head and there is at least one own kid of head and; 
*** the number of people in my big family is me plus own kids;
*** and no other relatives who are older;
replace myadult_alonewkid = 1 if yadult==1 & p_relhd==1 & numtemp1>0 & bf_numpers==numtemp1+1;
****
tab myadult_aloneno myadult_alonew;
** check is kids, see; 
egen temp3 = max(myadult_alonewkid), by(bfamid year);
*** see  if most of these people who are related to head are young;
su p_age if temp1==1 & temp3==1 & p_relhd==4 & year<=1988 & myadult_alonewkid!=1;
*DC changes from year>=1989 to year>=1988;
su p_age if temp1==1 & temp3==1 & p_relhd==5 & year>=1988 & myadult_alonewkid!=1;
*** look at those older than 18;
list hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year<=1988 & p_age>=18;
list hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year>=1988 & p_age>=18;
*** 82 obs in CA, small;
su hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year<=1988 & p_age>=18;
** 27 obs in CA, tiny;
su hhid p_age p_relhd f_type bfamid year p_famrel if temp1==1 & temp3==1 & year>=1988 & p_age>=18;
*DC ends;
** mutually exclusive (yeah);
su myadult_aloneno myadult_alonew if myadult_alonewkid==1;
su myadult_aloneno myadult_alonew if myadult_alonewkid==0;
drop temp1 numtemp1 temp3; 

gen temp2=0;
** for child of family head, impose that no other relatives;
/* 
*** p_famrel is not still around for <=88 if it were would do following line for pre-88;
*** indicator for child; 
replace temp2 = 1 if p_famrel==3 & year<=1988;
*/
*DC changes from year>=1989 to year>=1988;
replace temp2 = p_famrel==3 if year>=1988;
*DC ends;
*** any child of family in family;
egen numtemp2 = total(temp2), by(sfamid year hhid);
*** alone with a kid if I'm the head of family and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids;
*** and there is no bigger family;
replace myadult_alonewkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+1 & year<=1988 & bf_numpers==sf_numpers;
*DC changes from year>=1989 to year>=1988;
replace myadult_alonewkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+1 & year>=1989 & bf_numpers==sf_numpers;
*DC ends;
** check is kids;
egen temp3 = max(myadult_alonewkid), by(hhid year);

*** check on obs above age 18;
** can't be any of these;
su hhid p_age p_relhd f_type bfamid year p_famrel if temp2==1 & temp3==1 & year<=1988 & p_age>=18 & bf_numpers==sf_numpers;
*** 66 of these in CA (small);
*DC changes from year>=1989 to year>=1988;
su hhid p_age p_relhd f_type bfamid year p_famrel if temp2==1 & temp3==1 & year>=1988 & p_age>=18;
*DC ends;
*** example ref person 27, kid is 31, partner roommate;
*** ignore for now;
list hhid p_age p_relhd f_type bfamid year p_famrel temp2 temp3 bf_numpers p_marst if hhid==2571762;
** mutually exclusive (yeah);
su myadult_aloneno myadult_alonew if myadult_alonewkid==1;
su myadult_aloneno myadult_alonew if myadult_alonewkid==0;
drop temp2 numtemp2 temp3;

***********************************
*** Married, alone, no kids	***
***********************************

*** married and alone if married and head or spouse and only 2 people in big family;
*** or unrelated subfamily and only 2 there and i am one;

*** head married noone else, married af present  (all); 
replace myadult_marriednokid = 1 if yadult==1 & (p_marst==1|p_marst==2)  & sf_numpers==2 & p_relhd==1 & bf_numpers==sf_numpers;
*** married to head noone else in small family;
replace myadult_marriednokid = 1 if yadult==1 & (p_marst==1|p_marst==2)  & p_relhd==3 & sf_numpers==2 & year<=1988  & bf_numpers==sf_numpers;
*DC changes from year>=1989 to year>=1988;
replace myadult_marriednokid = 1 if yadult==1 & (p_marst==1|p_marst==2)  & (p_relhd==3 | p_relhd==4) & sf_numpers==2 & year>=1988  & bf_numpers==sf_numpers;
*DC ends;
*** mutually exclusive (yeah);
su myadult_aloneno myadult_alonewkid myadult_marriednokid if myadult_marriednokid==1;
su myadult_aloneno myadult_alonewkid myadult_marriednokid if myadult_marriednokid==0;


***********************************
*** Married, alone, w kids	***
***********************************
*** now see what gets added if you are allowed to have your own kids;

*** indicator for child of head;
gen temp1 = p_relhd==4 if year<=1988;
*DC changes from year>=1989 to year>=1988;
replace temp1 = p_relhd==5 if year>=1988;
*DC ends;
*** any child of head in HH;
egen numtemp1 = total(temp1), by(hhid);
*** alone with a kid if I'm the head and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and i'm married and bfam is small fam;
replace myadult_marriedwkid = 1 if yadult==1 & p_relhd==1 & numtemp1>0 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & bf_numpers==sf_numpers;
*** alone with a kid if I'm the spouse of head and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and i'm married;
replace myadult_marriedwkid = 1 if yadult==1 & p_relhd==3 & numtemp1>0 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year<=1988 & bf_numpers==sf_numpers;
*DC changes from year>=1989 to year>=1988;
replace myadult_marriedwkid = 1 if yadult==1 & (p_relhd==3 | p_relhd==4)  & numtemp1>0 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year>=1988 & bf_numpers==sf_numpers;
*** check age;
su p_age if numtemp1>0 & p_relhd==4 & year<=1988  & myadult_marriedwkid!=1;
su p_age if numtemp1>0 & p_relhd==5 & year>=1988 & myadult_marriedwkid!=1;
*DC ends;
** check is kids;
egen temp3 = max(myadult_marriedwkid), by(sfamid year);

*** 6488 obs pre 89, all married spouses of head;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year<=1988;
* 0 obs;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & ~(p_marst==1|p_marst==2) & year<=1988;


*DC changes from year>=1989 to year>=1988;
*** 7566 obs post 88, all spouses of family head;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3 | p_relhd==4) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & (p_marst==1|p_marst==2) & year>=1988;
** none;
su hhid p_age p_relhd f_type bfamid year p_famrel p_marst if numtemp1>0 & (p_relhd==3 | p_relhd==4) & temp3==1 & p_age>=18 & sf_numpers==numtemp1+2 & ~(p_marst==1|p_marst==2) & year>=1988;
*DC ends;

*** mutually exclusive  (yeah);
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==0;
drop temp1 numtemp1 temp3;

gen temp2=0;
/* 
*** p_famrel is not still around for <=88 if it were would do following line for pre-88;
*** indicator for child of family head;
replace temp2 = 1 if p_famrel==3 & year<=1988;
*/
*DC changes from year>=1989 to year>=1988;
replace temp2 = p_famrel==3 if year>=1988;
*DC ends;
*** any child of family head in small family;
egen numtemp2 = total(temp2), by(sfamid);
*** married  with a kid if I'm the head of family and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and i'm married;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & bf_numpers==sf_numpers;
*DC changes from year>=1989 to year>=1988;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==1 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1988 & head!=1 & bf_numpers==sf_numpers;

*** married  with a kid if I'm married to the head and there is at least one own kid of head and; 
*** the number of people in my small family is me plus own kids plus 2 and I'm married;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==2 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & bf_numpers==sf_numpers;
replace myadult_marriedwkid = 1 if yadult==1 & p_famrel==2 & numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1988 & head!=1 & bf_numpers==sf_numpers;
*DC ends;
** check is kids;
egen temp3 = max(myadult_marriedwkid), by(sfamid year);
*** check age;
su p_age if numtemp2>0 & temp3==1 & myadult_marriedwkid!=1;

*DC changes from year>=1989 to year>=1988;
*** first look at family heads;
*** check those above 18, none pre88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==1 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;
** none post 88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==1 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1988 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;

*** next  look at family spuses;
*** check those above 18, none pre88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==2 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year<=1988 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;
** none post 88;
su p_famrel numtemp2 sf_numpers p_marst if yadult==1 & p_famrel==2 & temp3!=0 &  numtemp2>0 & sf_numpers==numtemp2+2 & (p_marst==1|p_marst==2) & year>=1988 & head!=1 & myadult_marriedwkid!=1 & p_age>=18;
*DC ends;

*** mutually exclusive  (yeah);
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid if myadult_marriedwkid==0;
drop temp2 numtemp2 temp3;


********************************************************************
*** Alone married or unmarried w/wout kids suggested new variable **
*** to replace yadult_alone    	      	   	     	 	  **
********************************************************************;

gen myadult_aloneall = myadult_alonenokid + myadult_alonewkid + myadult_marriednokid + myadult_marriedwkid;
tab year [aw=p_marwt], su(myadult_aloneall);


***********************************
*** unmarried with parents WHAT DO W MARRIED & PARENTS?	***
***********************************
**** parent is householder;
replace myadult_par = 1 if yadult==1 & p_relhd==4 & year<=1988;
*DC changes from year>=1989 to year>=1988;
replace myadult_par = 1 if yadult==1 & p_relhd==5 & year>=1988;
***** foster kid, 89 on only;
replace myadult_par = 1 if yadult==1 & p_relhd==11 & year>=1988;
*** family is that kid is head;
/* not in my data;
replace myadult_par = 1 if yadult==1 & p_famrel==3 & year<=1988;
*/
replace myadult_par = 1 if yadult==1 & p_famrel==3 & year>=1988;

*** can't do in pre-89, head yadult and parent in hh;
gen headyadult=1 if p_relhd==1 & yadult==1 & year>=1988;
egen h_headyadult = total(headyadult), by(hhid);
tab h_headyadult;

gen temp1 = h_headyadult==1 & p_relhd==8 & year>=1988;
*** do within big family;
bysort bfamid: egen temp2=total(temp1);
replace myadult_par = 1 if headyadult==1 & temp2>0 & year>=1988;     /*are parents of young head present in bigfamily?*/
*DC ends;
*** check age;
su p_age if temp1<.;
*** not exactly mutually exclusive with; 
*** myadult_marriedwkid (1 fam in 94 with problems with CA);
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==0;


*** 1 ob in CA;
tab bf_numpers if myadult_par==1 & myadult_marriedwkid==1;
** prioritize p_relhd;
list hhid  p_relhd bf_numpers if myadult_par==1 & myadult_marriedwkid==1;
*** case, hhid=1349345 one of p_relhd==5, but bfamid is different in 1994;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_marriedwkid myadult_par bf_numpers sf_numpers if hhid==1349345, nolab;
*** look into later, for now, turn off prioritize bfamid;
replace myadult_par=0 if myadult_marriedwkid==1 & myadult_par==1;

*** now mutually exclusive;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_par==0;



*** head of sfam and Yadult;
gen sfheadyadult = yadult==1 & sfhead == 1;
bysort sfamid: egen sf_headyadult=total(sfheadyadult);
drop temp2  temp1;


***********************************
*** unmarried with relatives WHAT DO W MARRIED w rel?	***
*** copy ELIRA
***********************************
* Young adult living with other relatives;
replace myadult_rel = 1 if yadult==1 & p_famrel==4; /*the young adults is relative of the sf head*/
*young adult is sibling or other relative of the household head;
replace myadult_rel = 1 if yadult==1 & p_relhd==5 & year<=1988; 
*DC changes from year>=1989 to year>=1988;
replace myadult_rel = 1 if yadult==1 & (p_relhd==7 | p_relhd==9 | p_relhd==10) & year>=1988; 

gen temp1 = sf_headyadult==1 & p_famrel==4; 
bysort sfamid: egen temp2=total(temp1);
tab f_kind temp2 if year<=1988, missing;
tab f_type temp2 if year>=1988, missing;
*DC ends;
replace myadult_rel = 1 if sfheadyadult==1 & temp2>0 & head!=1;      /*are relatives of young sf head present in sf?*/
*** check age;
su p_age if temp1<.;
*** not mutually exclusive with alonewkid, 3 obs in CA;

su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_rel==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par if myadult_rel==0;
****
*** not mutually exclusive with alonewkid, 3 obs in CA;
tab myadult_alonewkid myadult_rel;
list hhid  if myadult_alonewkid==1 &  myadult_rel==1;
*** example again, big family id suggests not related but the relhd says are, go with bfamid;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_alonewkid myadult_rel bf_numpers sf_numpers if hhid==1319659, nolab;
** go with bfamid;
replace myadult_rel=0 if myadult_alonewkid==1;

*** not mutually exclusive with married wkid, 6 obs in ca;
list hhid  if myadult_marriedwkid==1 &  myadult_rel==1;
*** example again, big family id suggests not related but the relhd says are, go with bfamid;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_alonewkid myadult_rel bf_numpers sf_numpers if hhid==1348679, nolab;
** go with bfamid;
replace myadult_rel=0 if myadult_marriedwkid==1;
*** not mutually exclusive with par, 7 obs in ca, all natural adopted child;
tab year if myadult_par==1 & myadult_rel==1;
*DC changes from year>=1989 to year>=1988; 
tab p_relhd if myadult_par==1 & myadult_rel==1 & year>=1988;
*DC ends;
list hhid  if myadult_par==1 &  myadult_rel==1;
*** example, here parent is 24, people related to parent are 18 and 25, go with relative;
*** later maybe impose year difference with parent;
list hhid p_age p_relhd f_type bfamid year p_famrel p_marst sfamid bfamid myadult_par myadult_rel bf_numpers sf_numpers if hhid==1916069, nolab;
*** go with relative;
replace myadult_par=0 if myadult_rel==1;

su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==0;

drop temp1 temp2;


gen temp1 = sf_headyadult==1 & p_famrel==4;
bysort sfamid: egen temp2=total(temp1);
tab f_kind temp2, missing;
replace yadult_rel = 1 if sfheadyadult==1 & temp2>0 & yadult==1;      /*are relatives of young sf head present in sf?*/
*** check age;
su p_age if temp1<.;

su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==1;
su myadult_aloneno myadult_alonew myadult_marriednokid myadult_marriedwkid myadult_par myadult_rel if myadult_rel==0;


***********************************
*** check not any duplication *****
***********************************;

for any alonewkid marriednokid marriedwkid par rel: tab myadult_aloneno myadult_X;
for any alonenok marriednokid marriedwkid par rel: tab myadult_alonewkid myadult_X;
for any alonenok alonewk marriedwkid par rel: tab myadult_marriedno myadult_X;
for any alonenok alonewk marriednokid par rel: tab myadult_marriedw myadult_X;
for any alonenok alonewk marriednokid marriedwkid rel: tab myadult_par myadult_X;
for any alonenok alonewk marriednokid marriedwkid par: tab myadult_rel myadult_X;


*** tab parent line, alone no kid, mutually exclusive;
*** parent line, alone wkid 10 obs in  CA;
*** married no kid mutually exclusive;
*** marrried w kid 2 obs in 2 years in CA;
*** bunches of obs with parent line not missing and live with par not 1;
*** some with parent line not missing and live with other relative; 
for any alonenok alonewkid marriednokid marriedwkid par rel: tab p_parentline myadult_X;
*** look into live with parent issue; 
*** small number of spouses, a bunch of relatives of head who are reference persons, but bulk are borther sister of head;
*** but parent is in house;
tab p_relhd p_famrel if p_parentline!=0 & p_parentline<.  & myadult_par==0;

*DC changes from year>=1989 to year>=1988;
*** if brother/sister of head and that person has a parent, will identify that person as living with relatives;
tab p_relhd myadult_rel if p_parentline!=0 & myadult_par==0 & yadult==1 & year>=1988;

*DC ends here;

****************************************;
** Doubling  up/Female head	       		*;
****************************************;
** only a household concept;

** Concept 1, a related subfamily with a kid;
gen relsfunder18 = under18 * (f_type==3);
tab f_type relsfunder18;

egen h_relsubfwkid = max(relsfunder18), by(hhid);
tab h_relsubfwkid relsfunder18 if f_type==3;
drop relsfunder18;
** show can't get this unless there is a family for the householder;
*** h_relsubfwkid=0 if f_type===2;
tab h_relsubfwkid f_type;
label variable h_relsubfwkid "Related subfamily with a kid in HH";

** concept 2, any subfamily with a kid;
gen anysfunder18 = under18 * (f_type==3 | f_type==4);
tab f_type anysfunder18;
egen h_anysubfwkid = max(anysfunder18), by(hhid);
tab h_anysubfwkid anysfunder18 if f_type==3;
tab h_anysubfwkid anysfunder18 if f_type==4;
drop anysfunder18;
*** could get this wiht unrelated individual HHER;
tab h_anysubfwkid f_type if h_relsubfwkid==0;
label variable h_anysubfwkid "Any subfamily with a kid in the HH";

** concept 3, any subfamily with a kid and primary family has a kid;
gen anyprimaryfunder18 = under18 * (f_type==1);
tab f_type anyprimaryfunder18;
egen h_anyprimaryfwkid = max(anyprimaryfunder18), by(hhid);
gen h_anypfsubfwkid = h_anyprimaryfwkid * h_anysubfwkid;
tab h_anypfsubfwkid h_anyprimaryfwkid;
drop anyprimaryfunder18 h_anyprimaryfwkid;
label variable h_anypfsubfwkid "Primary and subfamily both have a kid in HH";

** concept 4, HH, more than 1 family and at least 1 kid;
egen h_numsf = sum((f_type==3 & sfhead==1)|(f_type==4 & sfhead==1)), by(hhid);
egen h_anypf = sum(f_type==1 & head==1), by(hhid);
*** fixed per Hilary comment;
egen h_anykidinfam = sum((f_type==1 & under18==1)|(f_type==3 & under18==1)|(f_type==4 & under18==1)), by(hhid);
*** fixed per Hilary comment;
gen h_gt1sfamwkid = (h_numsf + h_anypf >=2) & h_anykidinfam>=1;
tab h_gt1sfamwkid h_numsf, missing;
tab h_gt1sfamwkid h_anypf, missing;
tab h_gt1sfamwkid h_anykidinfam;
gen tmp = h_numsf + h_anypf;
tab h_gt1sfamwkid tmp, missing;
*** fixed per Hilary comment;
tab h_gt1sfamwkid tmp if h_anykidinfam >=1, missing;
tab h_gt1sfamwkid tmp if h_anykidinfam ==0, missing;
drop h_anypf h_numsf tmp h_anykidinfam;
label variable h_gt1sfamwkid "More than 1 family unit";


** concept 5, HH, with any small family with a female head and a kid;
** Hilary wants to also have it be an unmarried woman, include separated;
*** MB also imposes, diff in ages must be 14 or larger;
*** reference person for a family and female and *** Hilary addition **** unmarried/separated;
*** also MB adds NW/lowed;
*** 10/10 adding indicator for woman who is small family female unmarried/separated head with a kid;

gen tmp = p_famrel==1 & p_male==0 & p_marst>=4 & p_marst<=7;
gen tmpage = p_age if p_famrel==1 & p_male==0 & p_marst>=4  & p_marst<=7;
** not a white non-hispanic;
gen tmpnw = p_famrel==1 & p_male==0 & p_marst>=4 & p_marst<=7 & white!=1;
gen tmpagenw = p_age if p_famrel==1 & p_male==0 & p_marst>=4 & p_marst<=7 & white!=1;
** hsdo or hsgrad;
gen tmpled = p_famrel==1 & p_male==0 & p_marst>=4 & p_marst<=7 & (lths==1 | hsgrad==1);
gen tmpageled = p_age if p_famrel==1 & p_male==0 & p_marst>=4 & p_marst<=7 & (lths==1 | hsgrad==1);


egen sftmp = max(tmp), by(sfamid);
egen sftmpnw = max(tmpnw), by(sfamid);
egen sftmpled = max(tmpled), by(sfamid);

egen sftmpage = max(tmpage), by(sfamid);
egen sftmpagenw = max(tmpagenw), by(sfamid);
egen sftmpageled = max(tmpageled), by(sfamid);

*** some other kid in this same family;
*** This may not impose that it's my kid, but hopefully it will for most;
gen tmp2 = p_famrel!=1 & p_famrel!=0 & p_age<18;
gen tmpage2 = p_age if p_famrel!=1 & p_famrel!=0 & p_age<18;
egen sftmp2 = max(tmp2), by(sfamid);
egen sftmpage2 = max(tmpage2), by(sfamid);

*** see how often it might be age inappropriate to be my kid by means;
su p_age if tmp==1 & sftmp2==1;
su p_age if tmp2==1 & sftmp==1;
gen sftmpdiffage = sftmpage -sftmpage2;
gen sftmpdiffagenw = sftmpagenw -sftmpage2;
gen sftmpdiffageled = sftmpageled -sftmpage2;
su sftmpdiffage*, d;

*** new 10/10;
*** woman herself is a female sfhead;
gen femsfhead = 1 if tmp==1 & sftmp2==1 & sftmpdiffage>=14 & sftmpdiffage<.;
replace femsfhead=0 if femsfhead==.; 
tab femsfhead p_relhd, missing;
tab p_age femsfhead, missing;
tab sftmpdiffage if femsfhead==1;

** HH contains a female sfhead;
** impose constraint that sftmp==1 & sftmp2==1 and sftmpdiff>=14;
gen tmp3 = sftmp==1 & sftmp2==1 & sftmpdiffage>=14 & sftmpdiffage<.;
gen tmp3nw = sftmpnw==1 & sftmp2==1 & sftmpdiffagenw>=14 & sftmpdiffagenw<.;
gen tmp3led = sftmpled==1 & sftmp2==1 & sftmpdiffageled>=14 & sftmpdiffageled<.;
egen h_anyfemsfhead = max(tmp3), by(hhid);
egen h_anynwfemsfhead = max(tmp3nw), by(hhid);
egen h_anyledfemsfhead = max(tmp3led), by(hhid);

*** new 10/10;
*** for comparison with femsfhead;
*** married woman in a family with a child at least 14 years younger;
*** need not be a head;
gen tmpmar = p_male==0 & p_famrel!=0 & p_marst<=3 & p_age>=15;;
gen tmpagemar = p_age if p_male==0 & p_famrel!=0 & p_marst<=3 & p_age>=15;
egen sftmpmar = max(tmpmar), by(sfamid);
egen sftmpagemar = max(tmpagemar), by(sfamid);
gen sftmpmardiffage = sftmpagemar -sftmpage2;
gen femsfmar = 1 if tmpmar==1 & sftmp2==1 & sftmpmardiffage>=14 & sftmpmardiffage<.;
replace femsfmar=0 if femsfmar==.; 
tab femsfmar f_kind, missing;
tab p_age femsfmar, missing;
tab sftmpdiffage if femsfmar==1;

*** could get same kid as both;
*** hope not often;
*** can't get same woman;
tab femsfhead femsfmar;
tab sftmpmar sftmp if sftmp2==1 & sftmpmardiffage>=14 & sftmpdiffage>=14;
tab tmp2 if sftmp==1 & sftmpmar==1 & sftmpmardiffage>=14 & sftmpdiffage>=14;
*pause;


label variable femsfhead "Woman is unmarried/separated small family head with one kid under 18";
label variable femsfmar "Woman is married in a small familywith one kid under 18";

label variable h_anyfemsfhead "HH has at least one small family with an unmarried/separated female head and at least one other kid under 18";
label variable h_anynwfemsfhead "HH has at least one small family with a non-white or Hispanic unmarried/separated female head and at least one other kid under 18";
label variable h_anyledfemsfhead "HH has at least one small family with a unmarried/separatedfemale head with <=12 years ed and at least one other kid under 18";

tab h_anyfemsfhead sftmp;
tab h_anyfemsfhead sftmp2;

tab h_anyfemsfhead, su(sftmpdiffage);
tab h_anyfemsfhead tmp3;
*** Note in next tab, can be 1s in any cell, as could be in another SF in my HH;
*** but vast bulk should be in 1/1;
tab sftmp sftmp2, su(h_anyfemsfhead);
tab h_anyfemsfhead sfhead if p_male==0;
tab h_anyfemsfhead sfhead if p_male==1;
drop tmp tmp2 tmp3 sftmp sftmp2 sftmpage sftmpage2 sftmpdiffage;
drop tmpnw tmp3nw sftmpnw sftmpagenw sftmpdiffagenw;
drop tmpled tmp3led sftmpled sftmpageled sftmpdiffageled;


** check means;
su h_relsubfwkid h_anysubfwkid h_anypfsubfwkid h_gt1sfamwkid h_anyfemsfhead h_anynwfemsfhead h_anyledfemsfhead;
for any relsubfwkid anysubfwkid anypfsubfwkid gt1sfamwkid anyfemsfhead anynwfemsfhead anyledfemsfhead: tab year h_X;



*** check how many HH with an unrelated individual kid or a householder kid;
*** in CA 0.59% of records;
gen tmp = p_age<18 & (f_type==2 | f_type==5);
egen htmp = max(tmp), by(hhid);
tab htmp;

** HH with some family unit and a kid;
** In CA 61%;
gen tmp2 = p_age<18 & (f_type==1|f_type==3 | f_type==4);
egen htmp2 = max(tmp2), by(hhid);
tab htmp2;

*** how many with an unrelated individual kid or hher kid and no other kid;
*** about half;
tab htmp htmp2;

*** check if any of these are our measures of interest;
for any relsubfwkid anysubfwkid anypfsubfwkid gt1sfamwkid anyfemsfhead: tab htmp h_X if htmp2==1;
for any relsubfwkid anysubfwkid anypfsubfwkid gt1sfamwkid anyfemsfhead: tab htmp h_X if htmp2==0;

*** drop;
drop htmp* tmp*;
*pause on;
*pause;


*********************************;
** More drops                  **;
*********************************;
drop h_mcare h_champ h_hhiyn one sf_elderly countbigf ak hi f_headinx f_rsublowinc p_educ r_p_inc_flag*
	p_gradecom p_stat p_ethnicity p_care p_cov_hi elderly h_elderly bf_elderly calyear incyear  ;



**********************************;
* Multiple program participation *;
**********************************;

gen h_anyfoodtrans = h_foodstp | h_freelun if h_foodstp<. & h_freelun<.;
gen h_anyfoodtrans2 = h_foodstp | h_freelun | h_wicyn==1 if h_foodstp<. & h_freelun<. & h_wicyn==1;
gen h_fswelf = (r_h_inc_pa>0 & h_foodstp==1) if r_h_inc_pa<. & h_foodstp<.;
gen h_fswelfssi = ((r_h_inc_pa>0|r_h_inc_sp>0) & h_foodstp==1) if r_h_inc_pa<. & h_foodstp<. & r_h_inc_sp<.;
gen h_saftynocaid  = h_foodstp==1 | h_freelun==1 | (r_h_inc_pa>0) | r_h_inc_sp>0 | h_public==1 | h_rentsub==1 |h_enrgyas==1 if h_foodstp<.;
gen h_safty2nocaid = h_foodstp==1 | h_freelun==1 | (r_h_inc_pa>0) | r_h_inc_sp>0 | h_public==1 | h_rentsub==1 |h_enrgyas==1| h_wicyn==1 if h_wicyn<.;
gen h_saftywcaid   = h_foodstp==1 | h_freelun==1 | (r_h_inc_pa>0) | r_h_inc_sp>0 | h_public==1 | h_rentsub==1| h_enrgyas==1| h_anymedicaid==1 if h_foodstp<.;
gen h_safty2wcaid  = h_foodstp==1 | h_freelun==1 | (r_h_inc_pa>0) | r_h_inc_sp>0 | h_public==1 | h_rentsub==1 | h_enrgyas==1 |h_wicyn==1 | h_anymedicaid==1 if h_wicyn<.;

*EK adds this part from the marchcpsfamilyhh_immig.do file (1/30/13);
*** add 6/11;
*** new variable safety net no medicaid no public assistance;
*** to do: move to march program;
gen h_saftynocaidnopa = (h_foodstp==1 | h_freelun==1 | h_inc_sp>0 | h_public==1 | h_rentsub==1 | h_enrgyas==1);
*** Recall: h_saftwcaid: any safety net (includes medicaid and PA), also food stamps, free/reduced lunch, section 8 or housing, LIHEAP, SSI;
*EK ends;

gen h_pubsec8 = h_public | h_rentsub if h_public<. & h_rentsub<.;

*EK adds h_saftynocaidnopa as in the marchcpsfamilyhh_immig.do file (1/30/13);
for any h_anyfoodtrans h_anyfoodtrans2 h_fswelf h_fswelfssi h_saftynocaid h_safty2nocaid 
	h_safty2wcaid h_saftywcaid h_pubsec8 h_saftynocaidnopa: tab year, su(X);



***********************************************************************;
*Education distribution, Emp-pop ratios, hours, and earnings by group *;
***********************************************************************;
gen employed = p_weeks>0; 
gen fulltime = p_weeks>=50 & p_ushrs>=35; 
gen annualhrs = p_ushrs*p_weeks;

foreach n in employed fulltime annualhrs r_p_inc_ws {;
  gen `n'_lh = `n' if lths==1;
  gen `n'_hg = `n' if hsgrad==1;
  gen `n'_sc = `n' if somecol==1;
  gen `n'_cg = `n' if colgrad==1;
};

gen hrwage = r_p_inc_ws/annualhrs;

**** Removed sample selection;

*Puerto Rico*;
drop if statefip==43; 

*** keep stuff for merging possibly;
*** h_pos h_seq p_pos p_seq; 
**** new drop statements added because so much data;
*drop h_numfam f_seq f_pos p_sex p_race p_ch_mc p_ch_hi; /*EK comments this out since Hilary asked so*/;

*** comment out for now running 89-13;
****append using marchcps7788_recode;

*do gen_cmsa;

** some other recodes;
** household;
gen h_lths= h_edcat==1;
gen h_hsgrad= h_edcat==2;
gen h_somecol = h_edcat==3;
gen h_colgrad=h_edcat==4;
gen t = h_lths+h_hsgrad + h_somecol + h_colgrad;
tab t, missing;
tab t noheadhh;
drop t;


gen h_agele24 = h_age<=24;
gen h_age2534 = h_age>=25 & h_age<=34;
gen h_age3544 = h_age>=35 & h_age<=44;
gen h_age4564 = h_age>=45 & h_age<=64;
gen h_age65plus = h_age>=65 & h_age<.;
gen t = h_agele24 + h_age2534 + h_age3544 + h_age4564 + h_age65plus;
tab t;
drop t;

*DC changes from year>=1989 to year>=1988;
gen h_nevmar = h_marst ==7 if year>=1988;
gen h_sepwiddiv = h_marst>=4 & h_marst<=6 if year>=1988;
gen h_married = h_marst<=3 if year>=1988;
replace h_nevmar = h_marst ==8 if year<=1988;
replace h_sepwiddiv = h_marst>=5 & h_marst<=7 if year<=1988;
replace h_married = h_marst<=4 if year<=1988;
gen t= h_nevmar + h_sepwiddiv + h_married;
tab t;
drop t;

**** fix h_wgt p_marwt, other weights for implied decimal places 1988 and earlier;
**** Note h_wgt, sf_wgt, bf_wgt, and cf_wgt are weights of relevant family/HH head;
**** h_weight is CPS HH weight;
**** p_marwt is march CPS person weight, and p_bwt is basic CPS weight;
**** f_famwgt is CPS family weight;
**** p_bwt is not in data after 1988;
**** CF* dropped;
for any h_weight p_marwt f_famwgt h_wgt sf_wgt bf_wgt : replace X = X/100 if year<=1988;
label variable h_weight "CPS household weight";
label variable p_marwt "CPS march person weight";
*label variable p_bwt "CPS basic monthly weight, not in after 1988";
label variable f_famwgt "CPS family weight";
label variable h_wgt "March height of our HH head";
label variable sf_wgt "March height of our small family head";
label variable bf_wgt "March height of our big family head";
*label variable cf_wgt "March height of Census family head";


global demog = "h_lths h_hsgrad h_somecol h_colgrad h_male h_white h_hisp h_hispdkrf h_black h_other  h_age  h_agele24 h_age2534 h_age3544 h_age4564 h_age65plus h_nevmar h_sepwiddiv h_married";
su $demog if head==1 & hprobthreshold!=1 [aw=h_wgt];
label variable h_lths "HH head HS dropout";
label variable h_hsgrad "HH head HS graduate";
label variable h_somecol "HH head has some college";
label variable h_colgrad "HH head is college graduate";
label variable h_male "HH head is male";
label variable h_white "HH head is white non-Hispanic";
label variable h_hisp "HH head is Hispanic";
label variable h_hispdkrf "HH head DK/RF Hispanic";
label variable h_black "HH head is black non-Hispanic";
label variable h_other "HH head is other non-Hispanic race";
label variable h_age "HH head age";
label variable h_agele24 "HH head is 24 or under";
label variable h_age2534 "HH head is 25-34";
label variable h_age3544 "HH head is 35-44";
label variable h_age4564 "HH head is 45-64";
label variable h_age65plus "HH head is 65 plus";
label variable h_nevmar "HH head is never married";
label variable h_sepwiddiv "HH head is sep/wid/div";
label variable h_married "HH head is married now";

for any $demog : tab year  if head==1 & hprobthreshold!=1 [aw=h_wgt], su(X);

*** recipiency;
*** vet = uc + wc + vet;
*** asset is rnt + div + int;
*** cs is CSP + ALM + other after 1989;
for any ws se fr vet asset othpr sp ss pa dis ret earn oth : gen h_anyinc_X = r_h_inc_X !=0;
 label variable h_anyinc_ws "HH income from wage/salary ";
 label variable h_anyinc_se "HH income from self-employment";
 label variable h_anyinc_fr "HH income from farm";
 label variable h_anyinc_vet "HH income from WC/UI/veteran's";
 label variable h_anyinc_asset "HH income from rent/dividends/interest";
 label variable h_anyinc_othpr "HH income from child support/alimony/other";
 label variable h_anyinc_sp "HH income from SSI";
 label variable h_anyinc_ss "HH income from social security";
 label variable h_anyinc_pa "HH income from public assistance";
 label variable h_anyinc_dis "HH income from disability";
 label variable h_anyinc_ret "HH income from retirement";
 label variable h_anyinc_earn "HH income from earnings";
 label variable h_anyinc_oth "HH income from non-earnings";

 label variable r_h_inc_tot "HH total money income in 2009 $";
 label variable r_h_inc_ws "HH income from wage/salary  in 2009 $";
 label variable r_h_inc_se "HH income from self-employment in 2009 $";
 label variable r_h_inc_fr "HH income from farm in 2009 $";
 label variable r_h_inc_vet "HH income from WC/UI/veteran's in 2009 $";
 label variable r_h_inc_asset "HH income from rent/dividends/interest in 2009 $";
 label variable r_h_inc_othpr "HH income from child support/alimony/other in 2009 $";
 label variable r_h_inc_sp "HH income from SSI in 2009 $";
 label variable r_h_inc_ss "HH income from social security in 2009 $";
 label variable r_h_inc_pa "HH income from public assistance in 2009 $";
 label variable r_h_inc_dis "HH income from disability in 2009 $";
 label variable r_h_inc_ret "HH income from retirement in 2009 $";
 label variable r_h_inc_earn "HH income from earnings in 2009 $";
 label variable r_h_inc_oth "HH income from non-earnings in 2009 $";

global outcomes "hpovlt50 hpov50100 hbelowpov hpov100200 halt1povlt50 halt1belowpov halt2povlt50 halt2belowpov  halt1etpovlt50 halt1etbelowpov halt2etpovlt50 halt2etbelowpov h_anymedicaid h_anypublichi h_anyanyhi1 h_anyanyhi2 h_anynohi1 h_anynohi2 h_anykidpub h_anykidanyhi1 h_anykidanyhi2 h_anykidnohi1 h_anykidnohi2 h_own h_public h_rentsub h_foodstp h_freelun h_enrgyas h_relsubfwkid h_anysubfwkid h_anypfsubfwkid h_gt1sfamwkid h_anyfemsfhead r_h_inc_ws r_h_inc_pa r_h_inc_sp h_hfairpoor h_hexcgood h_anyinc_ws h_anyinc_se h_anyinc_fr h_anyinc_vet h_anyinc_asset  h_anyinc_othpr  h_anyinc_sp h_anyinc_ss h_anyinc_pa h_anyinc_dis h_anyinc_ret h_anyinc_earn h_anyinc_oth h_wicyn h_tanftranscc h_tanftred h_pubsec8 h_fswelf h_fswelfssi h_saftynocaid h_saftywcaid";
su $outcomes if head==1 & hprobthreshold!=1 [aw=h_wgt];
label variable hpovlt50 "HH income <50% of poverty threshold";
label variable hpov50100 "HH income in 50-100% of poverty threshold";
label variable hbelowpov "HH income <100% of poverty threshold";
label variable hpov100200 "HH income in 100-200% of poverty threshold";
label variable h_anymedicaid "Someone in HH on Mediciad";
label variable h_anypublichi "Someone in HH on Medicaid/Medicare/Military/CHIP post 01";
label variable h_anyanyhi1 "Someone in HH on private/Medicaid/Medicare/Military/CHIP 01 HI";
label variable h_anyanyhi2 "Someone in HH on group HI/Medicaid/Medicare/Military/CHIP 01 HI";
label variable h_anynohi1 "Someone in HH without HI anyhi1 concept ";
label variable h_anynohi2 "Someone in HH  without HI anyhi2 concept";
label variable h_anykidpub "Kid in HH on Medicaid/Medicare/Military/CHIP post 01";
label variable h_anykidanyhi1 "Kid in HH on private/Medicaid/Medicare/Military/CHIP 01 HI ";
label variable h_anykidanyhi2 "Kid in HH on group/Medicaid/Medicare/Military/CHIP 01 HI ";
label variable h_anykidnohi1 "Kid in HH without HI anyhi1 concept";
label variable h_anykidnohi2 "Kid in HH without HI anyhi2 concept";
label variable h_own "HH dwelling owned";
label variable h_public "HH in public housing ";
label variable h_rentsub "HH got rent subsidy from government";
label variable h_foodstp "Someone in HH on Food Stamps last year ";
label variable r_h_fsval "Value of HH Food Stamps in 2009 $";
label variable h_freelun "Some kid 5-18 in HH on free/reduced school lunch";
label variable h_enrgyas "Someone in HH got LIHEAP";
label variable r_h_enrgyva "Value of HH LIHEAP Oct - Dec, in 2009$";
label variable h_hfairpoor "HH had someone with fair/poor health";
label variable h_hexcgood "HH had someone with good/excellent health";
label variable h_wicyn "Someone in HH on WIC ";
label variable h_tanftranscc "Someone in HH got transportation ass. or did community service (non-cash TANF) ";
label variable h_tanftred "Someone in HH got job training/readiness/club, or went to GED classes (non-cash TANF)";
label variable h_pubsec8 "HH is in public housing or has subsidized rent";
label variable h_fswelf "Someone in HH on FS and cash welfare ";
label variable h_fswelfssi "Someone in HH on FS and either cash welfare or SSI ";
label variable h_saftynocaid "Someone in HH on FS/School lunch/cash welf/SSI/public housing/section 8/LIHEAP";
label variable h_saftywcaid "Someone in HH on FS/School lunch/cash welf/SSI/public housing/section 8/LIHEAP/Medicaid";
label variable halt1povlt50 "HH alt1 income <50% of poverty threshold";
label variable halt1belowpov "HH alt1 income <100% of poverty threshold";
label variable halt2povlt50 "HH alt2 income <50% of poverty threshold";
label variable halt2belowpov "HH alt2 income <100% of poverty threshold";
label variable halt1etpovlt50 "HH alt1 income <50% of exp. poverty threshold";
label variable halt1etbelowpov "HH alt1 income <100% of exp. poverty threshold";
label variable halt2etpovlt50 "HH alt2 income <50% of exp. poverty threshold";
label variable halt2etbelowpov "HH alt2 income <100% of exp. poverty threshold";


for any $outcomes : tab year if head==1 & hprobthreshold!=1 [aw=h_wgt], su(X);


*** master drop;
*** for now, drop cf variables (commented out above);
*** 7788 only;
*** p_famnum one h_fsflag ;
drop division p_chip p_hi_yn;

** not sure why but h_under18 is missing;
egen h_under18=max(under18), by(hhid);

***** Now fix ID variables so they are unique even if we stack 80-88, 89-20XX;
su hhid sfamid bfamid;
for any hhid sfamid bfamid: replace X = X+1000000;
su hhid sfamid bfamid;

*Final save;
	compress;
		save "./marcps88on/marcps`y'_recode.dta", replace;
	
*Erase intermediate file;	
cap erase "./marcps88on/temp/taxsim`y'.dta";
} ;
*** debugging;
** save small8913_recode, replace;
!date;
cap log close;
