********************************************************************************
******* Program to import and clean CBO fiscal feedback excel file *******
********************************************************************************

clear all 

cd "$project_code" 

****************** Load pre-processed excel file *******************************

* Specifying cell rows because the first handful of rows are blank in excel spreadsheet *
import excel "dta/stata_import_file.xlsx", ///
	sheet("CBO Data") cellrange(A5:AI3581) firstrow case(lower) //HARDCODED RANGE!!! AFFECTS YEAR! report_year HARDCODED


***** Droping extraneous lines (i.e. the blank rows in the excel spreadsheet)
drop e t-ai  // Drop blank extraneous variables  (i.e. columns w/ nothing in them)

drop if description==""

**** Convert the t+0, t+1, t+2, etc variables into numerical variable ****
destring t*, replace

**** Drop if no report date ****
drop if report_year==.

********************************************************************************
*** Merge potential GDP data into dataset **************

merge m:1 report_year using "dta/pgdp.dta"
drop _merge

********************************************************************************
*** convert description names into  more helpful var_names **************


gen descript_var = ""

**** Create new description variable that gets rid of leading and trailing spaces ****

gen des_temp = strtrim(description)

/*
* High Level categories: Economic, Technical, Legislative/Policy.
*/
replace descript_var= "econ_total" if des_temp=="Economic" | des_temp=="Economic Changes"
replace descript_var= "tech_total" if des_temp=="Technical" | des_temp=="Technical Changes"
replace descript_var= "leg_total" if des_temp=="Policy" | des_temp=="Legislative" ///
							     | des_temp=="Legislative Changes" | des_temp=="Legislative Changes" ///
								 | des_temp=="Total Legislative"
								 

/*
* Revenue, Outlay, or Net Interest 
*/							 

replace descript_var= "rev_total" if strpos(description, "Revenues")>0 | ///
			strpos(description, "Revenue")>0 & strpos(description, "Change")==1 | ///
			des_temp=="ChangestoRevenueProjections" | des_temp=="ChangesinRevenues"
			
replace descript_var= "out_total" if strpos(description, "Outlays")>0 & strpos(description, "Change")==1 ///
									| des_temp=="Outlays" | des_temp=="ChangestoOutlayProjections"

replace descript_var= "int_total" if des_temp=="Interest" | des_temp=="Net interest" | ///
									des_temp=="Net interest outlays" | des_temp=="Netinterestoutlays" | ///
									des_temp=="Net Interest" | des_temp=="Net interes"
		

save "temp/complete_dataset.dta", replace

cd "$project_code/do/"

