***********************
* Early Impact of ACA *
* Amanda Kowalski     *
*                     *
* 13 October 2014     *
***********************

*********************************************
* PURPOSE: Allocate quarterly data by state *
*		i.   Merge data             *
*		ii.  Allocation algorithm   *
*********************************************

clear
set matsize 2000
set type double
set more off, permanently

local tag "state_alloc"
capture log close `tag'
log using "logs/`tag'.log", replace name(`tag')

*****************
* i. Merge data *
*****************

*Use 2013 data for 2014 & append annual data files
use "data/intermediate/2013_data.dta", clear
expand 2, gen(is2014)
replace yr = 2014 if is2014 == 1
drop is2014

forval y = 2008/2012 {
	append using "data/intermediate/`y'_data.dta"
}

sort id yr
tempfile annual_data
save `annual_data'

*Merge annual data files to Schedule T data
use "data/intermediate/schedulet_data.dta", clear

merge m:1 id state yr name group domicile using `annual_data'
*Assert non-merges are "MRQ" (most recent quarter)
assert yrqtr == "MRQ" if _merge != 3
drop if yrqtr == "MRQ"
drop _merge

*Merge imputed quarterly data
merge m:1 id yrqtr yr qtr name group domicile using "data/intermediate/quarterly_data_nd_imputed.dta"
assert _merge != 2

*Clean data
drop if _merge == 1 & enr_st == 0 & mmonths_st == 0 & premium_st == 0 & cost_st == 0
drop if _merge == 1 & enr_st == . & mmonths_st == . & premium_st == . & cost_st == .

*Assess importance of non-merges
tab _merge
su enr_st mmonths_st premium_st cost_st if _merge != 3
su enr_st mmonths_st premium_st cost_st

*Drop non-merges
keep if _merge == 3
drop _merge

compress
save "data/intermediate/data_merged.dta", replace

**************************
* ii. Allocate algorithm *
**************************

*Replace missings with 0s
foreach var in "enr" "mmonths" "premium" "cost" {
	replace `var'_st = 0 if `var'_st == .
}

*Allocate using annual data
foreach var in "enr" "mmonths" "premium" "cost" {
	bysort id yq : egen `var'_sttot = sum(`var'_st)
	gen `var'_sh = `var'_st / `var'_sttot
}
*When missing, allocate using Schedule T data
bysort id yq : egen prem_t_tot = sum(prem_t)
foreach var in "enr" "mmonths" "premium" "cost" {
	replace `var'_sh = prem_t / prem_t_tot if `var'_sh == .
}
*When still missing, assign to state of domicile
foreach var in "enr" "mmonths" "premium" "cost" {
	replace `var'_sh = 1 if `var'_sh == . & state == domicile
}

tab id if enr_sh == . | mmonths_sh == . | premium_sh == . | cost_sh == .
tab name if enr_sh == . | mmonths_sh == . | premium_sh == . | cost_sh == .

*Generate state-allocated variables
gen e = enr * enr_sh
gen mm = mmonths * mmonths_sh
gen p = premium * premium_sh
gen c = cost * cost_sh

gen e_imp = enr_imp * enr_sh
gen mm_imp = mmonths_imp * mmonths_sh
gen p_imp = premium_imp * premium_sh
gen c_imp = cost_imp * cost_sh

*Save
keep  id yrqtr state yq yr qtr name group domicile e e_imp mm mm_imp p p_imp c c_imp
sort  id yrqtr state yq yr qtr name group domicile e e_imp mm mm_imp p p_imp c c_imp
order id yrqtr state yq yr qtr name group domicile e e_imp mm mm_imp p p_imp c c_imp
compress
save "data/intermediate/data_allocated.dta", replace

log close `tag'
