// TC: we no longer create surplus_exp here, but keep it from leg_regression_final_full

cd "$project_code" 

// 1) Start with rminusg dataset: clean it and save it to merge later
use "dta/historicaldebtevolution_cleaned.dta", clear

keep year r g rminusg debt g_nom gdp inflation interest primarysurplus_gdp
g b = debt/gdp
g rho = rminusg/(1+g_nom)
/*
nominal: debt, gdp, g_nom 
real: r g  
*/

save "temp/temp_historicaldebtevolution_cleaned_to_merge.dta", replace 

// 2) Load cleaned regression dataset
use  "dta/leg_regression_final_full.dta", clear
keep report_year report_half outgap surplus surplus_exp surplus_ewtd revenue outlays ///
lag_outgap_pgdp surplus_exp surplus_exp_ewtd surplus_exp_cur
// We will re-create these variables from scratch later:
// drop surplus_exp surplus_exp_cur // TC: we no longer create surplus_exp here, but keep it from leg_regression_final_full

g year = report_year

sort report_year report_half

// 3) Merge with the rminusg dataset we cleaned and saved in part 1)
merge m:m year  using "temp/temp_historicaldebtevolution_cleaned_to_merge.dta"
sort year report_half
keep if year >= 1980
drop _merge
replace report_year = year if report_year == .
drop year


// 4) Manually re-create variables
/* The purpose of script 3_create_regress_dataset.do is to clean complete_dataset.dta
and save it as leg_regression_final.dta. Since we want to re-create some of these variables manually, we load an intermediate version of these two datasets saved in script #3. */
merge m:m report_year report_half  using "temp/temp_complete_dataset.dta"
drop _merge

keep if report_year>=1983
keep if budget_line=="def" 
sort report_year report_half


// 5) Merge with PGDP data
merge m:1 report_year using "dta/pgdp.dta"
drop _merge
g year = report_year


sort year //IMPORTANT
* For each year, create pgdp_lead_ and pgdp_lag_ variable s

// STATA UPDATED AND THIS NO LONGER WORKS. NEED TO USE TSSET.
// TC to DY: use this code to create lags/leads of other vars
// (this code is better than lines 242-268 in script #3, should supersede)


preserve
keep report_year  b pgdp
bysort report_year: egen pgdp_unique = max(pgdp)
bysort report_year: egen b_unique = max(b)

drop pgdp b
rename pgdp_unique pgdp
rename b_unique b

duplicates drop	
tsset report_year

forvalues lead = 0(1)5 {
		g pgdp_lead_`lead' = .
}

forvalues lag = 1(1)2 {
		g pgdp_lag_`lag' =.
		g b_lag_`lag' = .

}

forvalues lead = 0(1)5 {
		replace pgdp_lead_`lead' = F`lead'.pgdp 

}

forvalues lag = 1(1)2 {
		replace pgdp_lag_`lag' = L`lag'.pgdp 
		replace b_lag_`lag' = L`lag'.b 

}
save "temp/leads_lags.dta", replace 

restore 



merge m:1 report_year  using "temp/leads_lags.dta"
drop _merge


// 6) Create variables of interest
// (this code is better than lines 242-307 in script #3, should supersede)
forvalues i = 0(1)5 {	
	gen surplus_t`i' =  - t`i'
	gen surplus_t`i'_wtd = w_t`i'*surplus_t`i'
	gen surplus_t`i'_wtd_norm = surplus_t`i'_wtd /pgdp_lead_`i'
	gen surplus_t`i'_norm = surplus_t`i' /pgdp_lead_`i'

}



// g surplus_exp_cur =  surplus_t0_wtd_norm + surplus_t1_wtd_norm + surplus_t2_wtd_norm +surplus_t3_wtd_norm + surplus_t4_wtd_norm + surplus_t5_wtd_norm

sort report_year report_half
// g surplus_exp = surplus_exp_cur[_n-1]

drop change_type budget_line t* disc a0 factor_w  augshift 

save "dta/merged_budget_rminusg_data.dta", replace 
cd "$project_code/do/"

