
** Create a dataset with individual characteristics in month m and labor market status in m+1
capture log close
log using ../logs/07_cpslookahead.log, text replace

clear

local scratch "../scratch"


*****************************************************************************************
*** Make a CPS panel
*****************************************************************************************
use if hrintsta==1 using `scratch'/06_combinecps.dta, clear
keep if prtage>=16 // Keep only if in age of working
* Here we are only interested in February-June 2020 
 //keep if year>2017
 keep if year==2020
gen yearmo=ym(hryear4, hrmonth)
format yearmo %tm
 
***************Step 2 -- Creating Links ***************
* Note: Step 2 and Step 3 were extracted from CREATEBIGCPSPANEL.DO by Jesse Rothstein, 6/3/2011 and based on create_simpleid.do, by Ana Rocca, 5/24/2011   
 di "****Cleaning longitudinal links***"
gen byte newmis=hrmis
egen hhid=concat(hrhhid hrhhid2) // create unique household identifier. In the cps it was split between 2 variables
drop hrhhid hrhhid2
egen personid=concat(gestfips hhid pulineno) // use phone number, household identifier, state location
egen double personnum1=group(personid) // Group variable by individual 
sort personnum1 yearmo
by personnum1: gen newpers=(_n==1) // First observation in the survey tagged as newpers
by personnum1: replace newpers=1 if _n>1 & (yearmo~=yearmo[_n-1]+1 | newmis~=newmis[_n-1]+1)
 
***************Step 3 -- Checking and Cleaning  links  ***************
*Check for sex/race/education/age discrepancies
  xtset personnum1 yearmo
  by personnum1: gen sexdif = (pesex~=L.pesex) if newpers~=1 
  by personnum1: gen racedif = (ptdtrace~=L.ptdtrace) if newpers~=1
  by personnum1: gen redudif=1 if L.peeduca==31 & (peeduca>32 | peeduca<31) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==32 & (peeduca>33 | peeduca<32) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==33 & (peeduca<33 | peeduca>34) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==34 & (peeduca<34 | peeduca>35) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==35 & (peeduca<35 | peeduca>36) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==36 & (peeduca<36 | peeduca>37) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==37 & (peeduca<37 | peeduca>39) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==38 & (peeduca<38 | peeduca>42) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==39 & (peeduca<39 | peeduca>42) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==40 & (peeduca<40 | peeduca>43) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==41 & (peeduca<41 | peeduca>43) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==42 & (peeduca<41 | peeduca>43) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==43 & (peeduca<43) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==44 & (peeduca<44) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==45 & (peeduca<44) & newpers~=1
       by personnum1: replace redudif=1 if L.peeduca==46 & (peeduca<44) & newpers~=1
       replace redudif=0 if redudif==. & newpers~=1
       by personnum1: replace redudif=. if (L.peeduca==. | peeduca ==.)
 by personnum1: gen agedif=prtage-L.prtage if newpers~=1
 by personnum1: gen ragedif=(agedif~=0 & agedif~=1 & agedif~=2) if newpers~=1

 gen s_r_a_e = (sexdif==1|racedif==1|ragedif==1|redudif==1) if newpers~=1
 replace newpers=1 if s_r_a_e==1

*And now make a well-matched person id
  by personnum1: gen pnum=sum(newpers)
  gen double personnum=10*personnum1+pnum
  drop *dif newpers s_r_a_e

 sort personnum newmis
 by personnum: gen obsnum=_n
 by personnum: gen nobs=_N
 sort personnum obsnum
 xtset personnum obsnum
 assert obsnum==L.obsnum+1 | obsnum==1
 assert hrmis==L.hrmis+1 | obsnum==1
 by personnum: gen firstmo=yearmo[1]
 format firstmo %tm
 format yearmo %tm
 drop newmis 
 
* Remove inconsistent employment status 
drop if pemlr==-1

compress
tempfile cpspanel
save `cpspanel'
  
  
*****************************************************************************************
**** Make the look-ahead file
*****************************************************************************************
sort personnum yearmo
isid personnum yearmo

by personnum: gen validahead=(_n<_N) & (yearmo[_n+1]==yearmo+1) & (hrmis[_n+1]==hrmis+1)

*Make look-ahead values
 foreach v of varlist pemlr {
   by personnum: gen F`v'=`v'[_n+1] if validahead==1
 }

****Make demographic characteristics

*Education
 recode peeduca (31/38=1) (39=2) (40/42=3) (43/46=4), gen(edcat4)
 label def ed4 1 "LTHS" 2 "HS" 3 "S Col" 4 "BA"
 label values edcat4 ed4
 tab edcat4 if edcat4<., gen(eddum)
 rename eddum1 ed_lths
 rename eddum2 ed_hs
 rename eddum3 ed_scol
 rename eddum4 ed_ba
label var ed_lths "Less than high school"
label var ed_hs  "High school" 
label var ed_scol "Some college"
label var ed_ba "Bachelor" 

*Race
 gen race_bl=inlist(ptdtrace, 2, 6, 10, 11, 12, 16, 17, 18, 22, 23, 25, 26)
 gen race_as=inlist(ptdtrace, 4, 5, 8, 9, 13, 14, 15, 19, 20, 21, 24)
 gen race_in=inlist(ptdtrace, 3, 7)
 gen hispanic=(prdthsp>0)
label var race_as "Asian"
label var race_bl "Black"
label var race_in "Native American"  
label var hispanic "Hispanic"
gen racecat4=1*race_bl + 2*race_as + 3*race_in
label def race4 0 "White" 1 "Black" 2 "Asian" 3 "Native American"
label values racecat4 race4

*Age
assert prtage<=99 & prtage>=16
/* 
recode prtage (16/24=1) (25/34=2) (35/54=3) (55/64=4) (65/99=5), gen(agecat5)
label def age5 1 "16-24" 2 "25-34" 3 "35-54" 4 "55-64" 5 "65+"
label values agecat5 age5
recode prtage (16/25=1) (26/37=2) (38/49=3) (50/64=4) (65/99=5), gen(agecat5b)
label def age5b 1 "16-25" 2 "26-37" 3 "38-49" 4 "50-64" 5 "65+"
label values agecat5b age5b
gen byte age_1=(prtage>=16 & prtage<=25) if prtage<.
gen byte age_2=(prtage>=26 & prtage<=37) if prtage<.
gen byte age_3=(prtage>=38 & prtage<=49) if prtage<.
gen byte age_4=(prtage>=50 & prtage<=64) if prtage<.
gen byte age_5=(prtage>=65 ) if prtage<.
label var  prtage "Age"
label var age_1  "Age 16-25"
label var age_2  "Age 26-37"
label var age_3  "Age 38-49"
label var age_4  "Age 50-64"
label var age_5  "Age 65 and over"
*/
recode prtage (16/25=1) (26/37=2) (38/49=3) (50/64=4) (65/99=5), gen(agecatHB) 
tab agecatHB, gen(age_)
label var age_1  "Age 16-25"
label var age_2  "Age 26-37"
label var age_3  "Age 38-49"
label var age_4  "Age 50-64"
label var age_5  "Age 65 and over"
 
*Industry
 tab primind1 if primind1>0, gen(indA)
 rename indA1 indA_agfor
 rename indA2 indA_mining
 rename indA3 indA_const
 rename indA4 indA_manufd
 rename indA5 indA_manufnd
 rename indA6 indA_tradew
 rename indA7 indA_retail
 rename indA8 indA_trans
 rename indA9 indA_util
 rename indA10 indA_info
 rename indA11 indA_finance
 rename indA12 indA_re
 rename indA13 indA_proftech
 rename indA14 indA_mgmt
 rename indA15 indA_educ
 rename indA16 indA_health
 rename indA17 indA_entmt
 rename indA18 indA_food
 rename indA19 indA_hhld
 rename indA20 indA_osvc
 rename indA21 indA_pubadm
 rename indA22 indA_army
 
 gen indBnum=primind1 if primind1>0
 replace indBnum=1 if indBnum==2
 replace indBnum=4 if indBnum==5
 replace indBnum=8 if indBnum==9
 replace indBnum=11 if indBnum==12
 replace indBnum=21 if indBnum==22
 label def indBlab 1 "Agriculture and mining" ///
                   3 "Construction" ///
                   4 "Manufacturing" ///
                   6 "Wholesale trade" ///
                   7 "Retail trade" ///
                   8 "Transp. and util." ///
                   10 "Information" ///
                   11 "Finance and RE" ///
                   13 "Prof., tech services" ///
                   14 "Mgmt, adm, waste" ///
                   15 "Education services" ///
                   16 "Health and social svcs" ///
                   17 "Arts, ent, recreation" ///
                   18 "Accom, food svcs" ///
                   19 "Private households" ///
                   20 "Other services" ///
                   21 "Public admin" ///
                   // 22 "Armed forces"
 label values indBnum indBlab                   
                   
 foreach v of varlist indA_* {
   clonevar indB_`v'=`v'
 }
 rename indB_indA_* indB_*
 gen indB_agmin=indA_agfor+indA_mining
 gen indB_manuf=indA_manufd+indA_manufnd
 gen indB_transut=indA_trans+indA_util
 gen indB_fire=indA_finance+indA_re
 drop indB_agfor indB_mining indB_manufd indB_manufnd indB_trans indB_util indB_finance indB_re
label var indB_const "Construction"
label var indB_tradew "Wholesale trade"
label var indB_retail "Retail trade"
label var indB_info "Information"
label var indB_proftech  "Professional and technical services"
label var indB_mgmt "Management, administrative and waste management services"
label var indB_educ  "Education services"
label var indB_health  "Health care and social services"
label var indB_entmt "Arts, entertainment and recreation"
label var indB_food  "Accommodation and food services"
label var indB_hhld "Private households"
label var indB_osvc "Other services, except private households"
label var indB_pubadm "Public administration"
label var indB_army "Armed forces"
label var indB_agmin "Agriculture and mining"
label var indB_manuf "Manufacturing" 
label var indB_transut "Transportation and utilities"
label var indB_fire "Finance and real estate"
 
*Presence of a child in the house
 tempfile lookahead
 save `lookahead', replace
 //Go back to original data to get child presence
  use if hrintsta==1 & year>2017 using `scratch'/06_combinecps.dta, clear
  egen hhid=concat(hrhhid hrhhid2) // create unique household identifier. In the cps it was split between 2 variables
  gen child=(prtage<18)
  gen child10=(prtage<=10)
  gen yearmo=ym(hryear4, hrmonth)
  format yearmo %tm
  collapse (sum) child child10, by(hhid yearmo)
  tempfile kids
  save `kids'
 use `lookahead'
 merge m:1 hhid yearmo using `kids', assert(2 3) keep(3) nogen
 gen byte anychild=(child>0)
 gen byte anychild10=(child10>0)
 gen byte married=(pemaritl==1)
 gen byte female=(pesex==2)
 gen byte manager=(prdtocc1==1) if prdtocc1<. & prdtocc1>-1
label var anychild "Presence of a child"
label var anychild10 "Presence of a child<=10"
label var married "Married"
label var female "Female"
label var manager "Manager"
 
save `scratch'/07_cpslookahead.dta, replace

log close

