
/***********************************************************************************************
This do file compiles the final CCD dataset;
***********************************************************************************************/

# delimit;
capture log close;
capture program drop all;
clear;
set more off;
set mem 500M;
estimates clear;

global data "C:/Users/nschwart/Desktop/nclb_final/ccd_folder/raw_data";
global final_data "C:/Users/nschwart/Desktop/nclb_final/ccd_folder/final_data";
global do_files "C:/Users/nschwart/Desktop/nclb_final/ccd_folder/do_files";

/*Generate economic dataset for merging*/
use "$data/econind_expend.dta", clear;
keep fiscalyear state unemprate povpct hhinc emppop frac_enroll pct_black pct_hisp;
for any unemprate povpct hhinc emppop frac_enroll pct_black pct_hisp: rename X st_X;
save "$data/clean_econind_expend.dta", replace;

/*Generate CCD school-level covariate dataset for merging*/
do "$do_files/ccd_school_append.do";

/******************Generate final dataset********************/
foreach f in dist95 dist96 dist97 dist98 dist99 dist00 dist01 dist02 dist03 dist04 dist05 dist06 dist07 dist08 {;

use "$data/`f'.dta", clear;
keep if schlev=="01" | schlev=="02" | schlev=="03";
if "`f'"=="dist95" | "`f'"=="dist96" {;
keep ccdnf leaid fipst name stname stabbr schlev year totalrev tfedrev 
  tstrev tlocrev totalexp tcurinst tcurssvc tcuroth tnonelse tcapout tcurelsc v33
  e17 e07 e08 e09 v35 v40 v45 v50 v55 v85 z33 v10;
  rename e17 tcurspup;
  rename e07 tcurssta;
  rename e08 tcursgen;
  rename e09 tcurssch;
  gen tcursoth = v35+v40+v45+v50+v55+v85;
  drop v35 v40 v45 v50 v55 v85;
};
if "`f'"=="dist97" {;
keep ccdnf leaid fipst name stname stabbr schlev year totalrev tfedrev 
  tstrev tlocrev totalexp tcurinst tcurssvc tcuroth tnonelse tcapout tcurelsc v33
  e17 e07 e08 e09 v40 v45 v85 v90 z33 v10;
  rename e17 tcurspup;
  rename e07 tcurssta;
  rename e08 tcursgen;
  rename e09 tcurssch;
  gen tcursoth = v40+v45+v85+v90;
  drop v40 v45 v85 v90;
};
if "`f'"=="dist98" | "`f'"=="dist99" | "`f'"=="dist00" | "`f'"=="dist01" | "`f'"=="dist02" | "`f'"=="dist03" | "`f'"=="dist04" | "`f'"=="dist05"
 | "`f'"=="dist06" | "`f'"=="dist07" | "`f'"=="dist08" {;
keep ccdnf leaid agchrt fipst name stname stabbr schlev year totalrev tfedrev 
  tstrev tlocrev totalexp tcurinst tcurssvc tcuroth tnonelse tcapout tcurelsc v33
  e17 e07 e08 e09 v40 v45 v85 v90 z33 v10;
  rename e17 tcurspup;
  rename e07 tcurssta;
  rename e08 tcursgen;
  rename e09 tcurssch;
  gen tcursoth = v40+v45+v85+v90;
  drop v40 v45 v85 v90;
};
rename v33 enrollment;
destring leaid, gen(leaid_nostr) force;
destring fipst, replace;
save "$data/`f'_temp.dta", replace;

};


use "$data/dist95_temp.dta";
foreach f in dist96 dist97 dist98 dist99 dist00 dist01 dist02 dist03 dist04 dist05 dist06 dist07 dist08 {;
append using "$data/`f'_temp.dta";
erase "$data/`f'_temp.dta";
};


foreach v in totalrev tfedrev tstrev tlocrev totalexp tcurinst tcurssvc tcuroth tnonelse tcapout tcurelsc 
 tcurspup tcurssta tcursgen tcurssch tcursoth z33 v10 {;
gen `v'_2009=.;
replace `v'_2009 = `v'*(214.537/140.3)/1000 if year=="92";
replace `v'_2009 = `v'*(214.537/144.5)/1000 if year=="93";
replace `v'_2009 = `v'*(214.537/148.2)/1000 if year=="94";
replace `v'_2009 = `v'*(214.537/152.4)/1000 if year=="95";
replace `v'_2009 = `v'*(214.537/156.9)/1000 if year=="96";
replace `v'_2009 = `v'*(214.537/160.5)/1000 if year=="97";
replace `v'_2009 = `v'*(214.537/163.0)/1000 if year=="98";
replace `v'_2009 = `v'*(214.537/166.6)/1000 if year=="99";
replace `v'_2009 = `v'*(214.537/172.2)/1000 if year=="00";
replace `v'_2009 = `v'*(214.537/177.1)/1000 if year=="01";
replace `v'_2009 = `v'*(214.537/179.9)/1000 if year=="02";
replace `v'_2009 = `v'*(214.537/184.0)/1000 if year=="03";
replace `v'_2009 = `v'*(214.537/188.9)/1000 if year=="04";
replace `v'_2009 = `v'*(214.537/195.3)/1000 if year=="05";
replace `v'_2009 = `v'*(214.537/201.6)/1000 if year=="06";
replace `v'_2009 = `v'*(214.537/207.342)/1000 if year=="07";
replace `v'_2009 = `v'*(214.537/215.303)/1000 if year=="08";

};

label variable totalrev_2009 "Total Revenues/1000 (2009 Dollars)";
label variable tstrev_2009 "Revenues From State/1000 (2009 Dollars)";
label variable tfedrev_2009 "Revenues From Fed/1000 (2009 Dollars)";
label variable tlocrev_2009 "Local Revenues/1000 (2009 Dollars)";
label variable totalexp_2009 "Total Expenditures/1000 (2009 Dollars)";
label variable tcurinst_2009 "Instructional Expenditures/1000 (2009 Dollars)";
label variable tcurssvc_2009 "Support Services Expenditures/1000 (2009 Dollars)";
label variable tcuroth_2009 "Other Elem/Sec Expenditures/1000 (2009 Dollars)";
label variable tnonelse_2009 "Other Non-Elem/Sec Expenditures/1000 (2009 Dollars)";
label variable tcapout_2009 "Capital Expenditures/1000 (2009 Dollars)";
label variable tcurelsc_2009 "Total Elem/Sec Expenditures/1000 (2009 Dollars)";
label variable tcurspup_2009 "Pupil Support/1000 (2009 Dollars)";
label variable tcurssta_2009 "Instructional Staff Support/1000 (2009 Dollars)";
label variable tcursgen_2009 "Gen Admin Support/1000 (2009 Dollars)";
label variable tcurssch_2009 "School Admin Support/1000 (2009 Dollars)";
label variable tcursoth_2009 "Other Support Services/1000 (2009 Dollars)";
gen tadminsup_2009 = tcursgen_2009 + tcurssch_2009;
label variable tadminsup_2009 "All Admin Support/1000 (2009 Dollars)";
gen stlocrev_2009 = tstrev_2009 + tlocrev_2009;
label variable stlocrev_2009 "State and Local Revenues/1000 (2009 Dollars)";

foreach v in totalexp tnonelse tcapout tcurelsc tcurinst tcurssvc tcuroth tfedrev tstrev tlocrev tcurspup tcurssta tadminsup tcursoth stlocrev {;
 gen `v'pp = `v'_2009/enrollment;
 label variable `v'pp "Per-Pupil `v' in Thousands (2009 dollars)";
 gen log`v'pp = log(`v'pp);
};

gen fiscalyear=.;
replace fiscalyear=1992 if year=="92";
replace fiscalyear=1993 if year=="93";
replace fiscalyear=1994 if year=="94";
replace fiscalyear=1995 if year=="95";
replace fiscalyear=1996 if year=="96";
replace fiscalyear=1997 if year=="97";
replace fiscalyear=1998 if year=="98";
replace fiscalyear=1999 if year=="99";
replace fiscalyear=2000 if year=="00";
replace fiscalyear=2001 if year=="01";
replace fiscalyear=2002 if year=="02";
replace fiscalyear=2003 if year=="03";
replace fiscalyear=2004 if year=="04";
replace fiscalyear=2005 if year=="05";
replace fiscalyear=2006 if year=="06";
replace fiscalyear=2007 if year=="07";
replace fiscalyear=2008 if year=="08";
drop year;

gen state="";
replace state="Alabama" if fipst==1;
replace state="Alaska" if fipst==2;
replace state="Arizona" if fipst==4;
replace state="Arkansas" if fipst==5;
replace state="California" if fipst==6;
replace state="Colorado" if fipst==8;
replace state="Connecticut" if fipst==9;
replace state="Delaware" if fipst==10;
replace state="Washington D.C." if fipst==11;
replace state="Florida" if fipst==12;
replace state="Georgia" if fipst==13;
replace state="Hawaii" if fipst==15;
replace state="Idaho" if fipst==16;
replace state="Illinois" if fipst==17;
replace state="Indiana" if fipst==18;
replace state="Iowa" if fipst==19;
replace state="Kansas" if fipst==20;
replace state="Kentucky" if fipst==21;
replace state="Louisiana" if fipst==22;
replace state="Maine" if fipst==23;
replace state="Maryland" if fipst==24;
replace state="Massachusetts" if fipst==25;
replace state="Michigan" if fipst==26;
replace state="Minnesota" if fipst==27;
replace state="Mississippi" if fipst==28;
replace state="Missouri" if fipst==29;
replace state="Montana" if fipst==30;
replace state="Nebraska" if fipst==31;
replace state="Nevada" if fipst==32;
replace state="New Hampshire" if fipst==33;
replace state="New Jersey" if fipst==34;
replace state="New Mexico" if fipst==35;
replace state="New York" if fipst==36;
replace state="North Carolina" if fipst==37;
replace state="North Dakota" if fipst==38;
replace state="Ohio" if fipst==39;
replace state="Oklahoma" if fipst==40;
replace state="Oregon" if fipst==41;
replace state="Pennsylvania" if fipst==42;
replace state="Rhode Island" if fipst==44;
replace state="South Carolina" if fipst==45;
replace state="South Dakota" if fipst==46;
replace state="Tennessee" if fipst==47;
replace state="Texas" if fipst==48;
replace state="Utah" if fipst==49;
replace state="Vermont" if fipst==50;
replace state="Virginia" if fipst==51;
replace state="Washington" if fipst==53;
replace state="West Virginia" if fipst==54;
replace state="Wisconsin" if fipst==55;
replace state="Wyoming" if fipst==56;

drop if leaid_nostr==.;
drop if enrollment<0;

/*Use Tom's code to drop outliers*/
drop if fipst==11;
drop if fipst==15;
drop if enrollment==0;
drop if agchrt=="1";
/*drop if agchrt=="2";*/
keep if schlev=="03";
gen trevpup = tfedrevpp+tstrevpp+tlocrevpp;
egen trev_p95 = pctile(trevpup), p(95) by(state);
egen trev_p5 = pctile(trevpup), p(5) by(state);
drop if trevpup>1.5*trev_p95;
drop if trevpup<0.5*trev_p5;

/*Merge in consequential accountability data*/
mmerge state using "$data/ca_data.dta";
drop if state=="Hawaii";
drop if state=="Washington D.C.";
drop _merge;

/*Merge in covariates from school-level CCD*/
mmerge leaid_nostr fiscalyear using "$data/CCD_district_cov.dta", umatch(leaid_nostr year) type(1:1);
tab _merge;
drop if _merge==1 | _merge==2;
drop _merge;
erase "$data/CCD_district_cov.dta";
erase "$data/CCD_school_cov.dta";

/*Salary info*/
gen salary_2009 = z33_2009/fte;
gen salplusben_2009 = (z33_2009+v10_2009)/fte;
label variable salary_2009 "Salary per FTE/1000 (2009 Dollars)";
label variable salplusben_2009 "Salary plus benefits per FTE/1000 (2009 Dollars)";

/*Merge in 2000 poverty data from school district demographic database*/
mmerge leaid_nostr using "$data/poverty_2000.dta", umatch(leaid);
drop if _merge==1 | _merge==2;
drop _merge;

/*Replace missing covariate data with zeroes*/
gen pct_bl_hisp_flag = 0;
replace pct_bl_hisp_flag=1 if pct_bl_hisp==.;
replace pct_bl_hisp=0 if pct_bl_hisp==.;
gen pov_rate_00_flag=0;
replace pov_rate_00_flag=1 if pov_rate_00==.;
replace pov_rate_00=0 if pov_rate_00==.;

gen dist_enroll_hun = enrollment/100;
foreach v in dist_enroll_hun pct_bl_hisp pov_rate_00 {;
 gen `v'_sq = `v'^2;
 gen `v'_cu = `v'^3;
 gen `v'_qu = `v'^4;
};
gen pct_bl_hisp_pov = pct_bl_hisp*pov_rate_00;

/*FTE and salary outliers*/
replace pup_tch=. if fte==0;
egen puptch_p95 = pctile(pup_tch), p(95) by(state);
egen puptch_p5 = pctile(pup_tch), p(5) by(state);
replace pup_tch=. if pup_tch>1.5*puptch_p95;
replace pup_tch=. if pup_tch<0.5*puptch_p5;
egen sal_p95 = pctile(salary_2009), p(95) by(state);
egen sal_p5=pctile(salary_2009), p(5) by(state);
replace salary_2009=. if salary_2009>1.5*sal_p95;
replace salary_2009=. if salary_2009<0.5*sal_p5;
egen salben_p95 = pctile(salplusben_2009), p(95) by(state);
egen salben_p5=pctile(salplusben_2009), p(5) by(state);
replace salplusben_2009=. if salplusben_2009>1.5*salben_p95;
replace salplusben_2009=. if salplusben_2009<0.5*salben_p5;

/*Merge in state-level covariates for falsification tests*/
mmerge state fiscalyear using "$data/clean_econind_expend.dta";
drop if _merge==2;

/*Create variable that divides sample into within-state poverty quartiles*/
egen statenum = group(state);
forval n=1/49 {;
xtile pov_quart_`n' = pov_rate_00 if statenum==`n', nq(4);
};
gen pov_quart=.;
forval n=1/49 {;
replace pov_quart = pov_quart_`n' if statenum==`n';
drop pov_quart_`n';
};
drop statenum;
label variable pov_quart "4 is poorest group";

/*Generate regression variables*/
gen TREAT = yearca>2001 | yearca==.;
gen year = fiscalyear - 1989;
gen nclb = 0;
replace nclb = 1 if fiscalyear >= 2003;
gen yr_since_nclb = fiscalyear - 2002;
replace yr_since_nclb = 0 if yr_since_nclb<0;
gen TREAT_year = TREAT*year;
gen TREAT_nclb = TREAT*nclb;
gen TREAT_yr_since = TREAT*yr_since_nclb;

gen exca9801 = yearca==1998 | yearca==1999 | yearca==2000 | yearca==2001;

gen NEWTREAT=.;
replace NEWTREAT=11 if yearca==.;
replace NEWTREAT=yearca-1992 if yearca~=.;
gen NEWTREAT_year = NEWTREAT*year;
gen NEWTREAT_nclb = NEWTREAT*nclb;
gen NEWTREAT_yr_since = NEWTREAT*yr_since_nclb;  

gen year_sq = year^2;
gen NEWTREAT_year_sq = NEWTREAT*year_sq;

gen omityears_sample=1;
replace omityears_sample=0 if fiscalyear==1995 | fiscalyear==1996;

gen naep_sample=0;
replace naep_sample=1 if (state~="Alaska" & state~="Colorado" & state~="Delaware" & state~="Florida" 
 & state~="Illinois" & state~="Kansas" & state~="New Hampshire" & state~="New Jersey" & state~="Pennsylvania" 
 & state~="South Dakota" & state~="Washington" & state~="Wisconsin") 
 & (fiscalyear==1992 | fiscalyear==1996 | fiscalyear==2000 | fiscalyear==2003 | fiscalyear==2005 | 
 fiscalyear==2007);

save "$final_data/dist_expend.dta", replace;
erase "$data/dist95_temp.dta";

