********************************************************************************
* Program:  Collect Census Projections Data
* Notes:    THIS MUST BE RUN ON HAL
* Date:     02/05/2021
********************************************************************************

* Pull in Census Projections data
import delimited "./Misc/Projections/Data/Census_proj_racegender.csv", clear

forvalues i = 0/100 {
if `i' < 25 | `i' > 64 {
	drop pop_`i'
	}
}

drop total_pop
gen total_pop = pop_25
forvalues i = 25/64 {
	replace total_pop = total_pop + pop_`i'
}
*racegroup = 1 if race is white only and if ethnicity is not hispanic
*racegroup = 2 if race is black no matter the ethnicity
*racegroup = 4 if race is asian, pacific islander, alaska native, two+ races
drop if origin == 0
drop if sex == 0
drop if race == 0
gen racegroup = 1 if race == 1
replace racegroup = 2 if race == 2
replace racegroup = 4 if race == 3 | race == 4 | race == 5 | race == 6
*switch to hispanic if white or asian and hispanic
replace racegroup = 3 if origin == 2 & (racegroup == 1 | racegroup == 2 | racegroup == 4)

drop race origin 
drop if racegroup == . 

* Gender groups
rename sex gendergroup

*Create age groups
forvalues i = 1/4 {
	local j = 15 + `i'*10
	local k = 15 + `i'*10 + 1
	local l = 15 + `i'*10 + 2
	local m = 15 + `i'*10 + 3
	local n = 15 + `i'*10 + 4
	local o = 15 + `i'*10 + 5
	local p = 15 + `i'*10 + 6
	local q = 15 + `i'*10 + 7
	local r = 15 + `i'*10 + 8
	local s = 15 + `i'*10 + 9
	gen agegroup`i' = pop_`j' + pop_`k' + pop_`l' + pop_`m' + pop_`n' + pop_`o' + pop_`p' + pop_`q' + pop_`r' + pop_`s'
}

drop pop_*

#Collapse age groups by race and gender groups by year
collapse (sum) agegroup1 agegroup2 agegroup3 agegroup4, by(year racegroup gendergroup)
preserve
collapse (sum) agegroup1 agegroup2 agegroup3 agegroup4, by(year)
gen total_pop = agegroup1 + agegroup2 + agegroup3 + agegroup4
tempfile totpop
save `totpop'
restore
merge m:1 year using `totpop', nogen

reshape long agegroup, i(year gendergroup racegroup) j(holder)
rename agegroup pop
rename holder agegroup

*Calculate group shares

rename pop proj_pop
rename total_pop proj_tot
gen proj_share = proj_pop*100/proj_tot

gen newid = 1 if gendergroup == 1 & agegroup == 1
replace newid = 2 if gendergroup == 1 & agegroup == 2
replace newid = 3 if gendergroup == 1 & agegroup == 3
replace newid = 4 if gendergroup == 1 & agegroup == 4
replace newid = 5 if gendergroup == 2 & agegroup == 1
replace newid = 6 if gendergroup == 2 & agegroup == 2
replace newid = 7 if gendergroup == 2 & agegroup == 3
replace newid = 8 if gendergroup == 2 & agegroup == 4

save "./Misc/Projections/Data/pop.dta", replace


* Pull in calculated data from our code and put it in the same form as the projection data
foreach r in "Black" "White" "Hispanic" "Other" {
	use "./Data_cleaning/Data/Stata_dta/agegenderraceshare_`r'.dta", clear
	keep date_ym tot_pop share_pop newid
	
	if "`r'" == "White" gen racegroup = 1 
	if "`r'" == "Black" gen racegroup = 2 
	if "`r'" == "Hispanic" gen racegroup = 3 
	if "`r'" == "Other" gen racegroup = 4 
	gen gendergroup = 1 if newid == 1 | newid == 2 | newid == 3 | newid == 4  
	replace gendergroup = 2 if newid == 5 | newid == 6 | newid == 7 | newid == 8
	gen agegroup = 1 if newid == 1 | newid == 5
	replace agegroup = 2 if newid == 2 | newid == 6
	replace agegroup = 3 if newid == 3 | newid == 7
	replace agegroup = 4 if newid == 4 | newid == 8
	
	rename date_ym year
	keep if year >= 2016

	tempfile `r'pop
	save ``r'pop'
}

use `Whitepop', clear
foreach r in "Black" "Hispanic" "Other" {
	append using ``r'pop'
}

gen group_pop = tot_pop*share_pop/100
rename group_pop calc_pop
rename tot_pop calc_tot
rename share_pop calc_share

save "./Misc/Projections/Data/orig_sharepop.dta", replace

*Merge projection data with the data calculated by our main calculations

merge 1:1 gendergroup agegroup racegroup year using "./Misc/Projections/Data/pop.dta"

drop _merge

sort year gendergroup agegroup racegroup

*Calculate differences between projection data 2016-2019 as compared to our main calculations

gen perc_dif = (proj_pop - calc_pop)*100/proj_pop
gen share_dif = proj_share - calc_share
gen pop_dif = proj_tot - calc_tot

rename year date_ym

merge 1:1 date_ym agegroup racegroup gendergroup using "./Misc/Projections/Output/SEpopsharesthrough19.dta", nogen

gen proj_pop_NSE = proj_pop*(1-SE)

* collapse calc_tot (sum) proj_pop_NSE proj_pop, by(date_ym)


* Save projection populations by age/gender/race group for each year by race

keep date_ym proj_pop_NSE newid racegroup

save "./Misc/Projections/Data/comp_sharepop.dta", replace




forvalues i = 1/4 {
	if `i' == 1 {
		use "./Misc/Projections/Data/comp_sharepop.dta", clear
		keep if racegroup == 1
		keep date_ym proj_pop_NSE newid
		drop if date_ym < 2020
		rename proj_pop_NSE group_pop
		save "./Misc/Projections/Data/proj_adjustedsum_White.dta", replace
		
	}
	else if `i' == 2 {
		use "./Misc/Projections/Data/comp_sharepop.dta", clear
		keep if racegroup == 2
		keep date_ym proj_pop_NSE newid
		drop if date_ym < 2020
		rename proj_pop_NSE group_pop
		save "./Misc/Projections/Data/proj_adjustedsum_Black.dta", replace
	}
	else if `i' == 3 {
		use "./Misc/Projections/Data/comp_sharepop.dta", clear
		keep if racegroup == 3
		keep date_ym proj_pop_NSE newid
		drop if date_ym < 2020
		rename proj_pop_NSE group_pop
		save "./Misc/Projections/Data/proj_adjustedsum_Hispanic.dta", replace
	}
	else{
		use "./Misc/Projections/Data/comp_sharepop.dta", clear
		keep if racegroup == 4
		keep date_ym proj_pop_NSE newid
		drop if date_ym < 2020
		rename proj_pop_NSE group_pop
		save "./Misc/Projections/Data/proj_adjustedsum_Other.dta", replace
	}
}
