********************************************************************************
* Program:  Collect CPS Data for Disparities Labor Conference
* Notes:    THIS MUST BE RUN ON HAL
********************************************************************************


***** ONLY USE ORG DATA *************************
keep if mis == 4 | mis == 8 

* Drop if missing or negative ernwgt
drop if missing(ernwgt)
drop if ernwgt < 0 

* Drop if self-employed: 
drop if (class2 == 5 | class2 == 6) & (ym > tm(1989m12) & ym < tm(1994m1))
drop if (class3 == 6 | class3 == 7) & ym > tm(1993m12)

foreach i in E U N {
	gen wgt`i' = ernwgt if `i' == 1
	replace wgt`i' = 0 if missing(wgt`i') 
}


***** Set up demographic groups ***********************
* Age Groups
drop if age <= 24
drop if age >= 65
gen agegroup  = 1 if age >=25 & age < 35
replace agegroup = 2 if age >= 35 & age < 45
replace agegroup = 3 if age >= 45 & age < 55
replace agegroup = 4 if age >= 55 & age < 65

* Gender groups
rename sex gendergroup

# delimit
label define gendergroups
	1 "Male"
	2 "Female"
	,replace
;
label values gendergroup gendergroups;
# delimit cr


** Race / Ethnicity groups
gen spaneth1=spaneth
forvalues x=2/4 {
	replace spaneth=spaneth`x' if spaneth==.
} 

* Hispanic
replace racegrp = 3 if spaneth >= 1 & spaneth <= 7 & inrange(year,1989,2002)
replace racegrp = 3 if spaneth >= 1 & spaneth <= 5 & inrange(year, 2003, 2013) 
replace racegrp = 3 if inrange(spaneth, 1, 8) & year >= 2014 

drop spaneth
rename spaneth1 spaneth

replace racegrp = 4 if racegrp == 5
rename racegrp racegroup

levelsof racegroup, local(rgroups)

local nwhite 1
local nblack 2
local nhispanic 3
local nother 4

** Education Groups
gen 	educgroup = 1 if grdatn <= 38 & grdatn !=.
replace educgroup = 1 if grdatn == 39 & !missing(grdatn)
replace educgroup = 1 if grdatn == 40 & !missing(grdatn)
replace educgroup = 2 if grdatn == 41 | grdatn == 42
replace educgroup = 3 if grdatn == 43 & !missing(grdatn)
replace educgroup = 4 if grdatn >= 44 & !missing(grdatn)

replace educgroup = 1 if missing(grdatn) & (grdhi<13|grdhi2<12|(grdhi==13 & grdcom==2)|(grdhi2==12 & grdcom==2))
replace educgroup = 1 if missing(grdatn) & ((grdhi==13 & grdcom==1)|(grdhi2==12 & grdcom==1))
replace educgroup = 1 if missing(grdatn) & (grdhi==14|grdhi2==13|(grdhi==15 &grdcom==2)|(grdhi2==14 & grdcom==2))
replace educgroup = 2 if missing(grdatn) & ((grdhi==15 & grdcom==1)|(grdhi2==14 & grdcom==1)|grdhi==16|grdhi2==15|(grdhi==17 & grdcom==2)|(grdhi2==16 & grdcom==2))
replace educgroup = 3 if missing(grdatn) & ((grdhi==17 & grdcom==1)|(grdhi2==16 & grdcom==1))
replace educgroup = 4 if missing(grdatn) & ((grdhi>17 & grdhi<.)|(grdhi2>16 & grdhi2<.))

drop if missing(educgroup)

# delimit ;
label define educgroups
	1 "HS or less"
	2 "Associates" 
	3 "Bachelors"
	4 "Graduate"
	;
label values educgroup educgroups;
# delimit cr


*Merge in Utilization Measure data (underutilized = 1)
preserve
do "${programs_data}/Aux Code/1utilizationdata.do"
restore
merge m:1 year fineoccgroup using "./Data_cleaning/Data/utildata.dta", nogen
gen utilgroup = 1 if (educgroup == 3 | educgroup == 4) & educunderbach >= .5
replace utilgroup = 2 if (educgroup == 3 | educgroup == 4) & educunderbach < .5


***** Set up age/gender/race groups 
do "${programs_data}/Aux Code/1groupdefn.do"



keep *group E U N month year wgt ernwgt wgtE wgtU wgtN ym ernwk* usualhrs usualhrsf2 usualhrsrng majocc hourlyind


gen date_ym = year(dofm(ym)) 
drop if ym < tm(1990m1)

save "./Data_cleaning/Data/cps_extract_groups.dta",replace


gen tot_pop = ernwgt	
* GET TOTAL CPS RATES FOR EACH MONTH
collapse (sum) wgtE (sum) wgtU (sum) wgtN (sum) tot_pop, by(date_ym)
replace tot_pop = tot_pop / 12
gen urate_cps = 100 * wgtU / (wgtE + wgtU)	
gen lfrate_cps = 100 * (wgtE + wgtU) / (wgtE + wgtU + wgtN)
gen erate_cps = 100 * wgtE / (wgtE + wgtU + wgtN)
save "./Data_cleaning/Data/actual_cps.dta", replace

export excel using "./Data_cleaning/Output/actual_cps.xlsx", sheet("Overall") sheetreplace firstrow(varlab)

