#delimit;
clear;
clear matrix;
set mem 500M;
set more off;
capture log close;
macro drop _all;
program drop _all;
estimates clear;

***** SET DIRECTORY STRUCTURE *****;

  global data "/homes/nber/bajacob/nclb_final/naep_folder/raw_data/";
  global data_update "/homes/nber/bajacob/nclb_final/naep_folder/raw_data/other_updates/";
  global figs "/homes/nber/bajacob/nclb_final/naep_folder/figures/";
  global analysis "/homes/nber/bajacob/nclb_final/naep_folder/analysis/";
  global tabs "/homes/nber/bajacob/nclb_final/naep_folder/tables/";
  global race_pared "/homes/nber/nclb_final/naep_folder/raw_data/naep_race_pared/";
  global econ "/homes/nber/bajacob/nclb_final/naep_folder/raw_data/econind_data/";
  global m09  "/homes/nber/bajacob/nclb_final/naep_folder/raw_data/math2009/";
  global do_files "/homes/nber/bajacob/nclb_final/naep_folder/do_files/";

global outcomes "mean"; 
*global outcomes "mean pct10 pct25 pct50 pct75 pct90 pct_bsc pct_prof pct_adv";
global subjects "math";
*global subjects "math read";
*global grades "4";
global grades "4 8";
global groups "All";
*global groups "All Black White Hispanic";

global statecodes "AL AK AZ AR CA CO CT DE DC FL GA HI IL ID IA IN KS KY LA ME MD MA MI 
  MN MS MO MT NE NV NH NJ NM NY NC ND OH OK OR PA RI SC SD TN TX UT VA VT WA WV WI WY";

global stw3g4m "AL AZ AR CA CT DC GA HI IA IN KY LA ME MD MA MI 
  MN MS MO NE NM NY NC ND RI SC TN TX UT VA WV WY";


/*Prepare math2009 data (added after first draft)*/
do "${do_files}math2009.do";
clear;

*** LOAD DATA ***;

log using "nclb_final.log", replace;

use "${data}nclb_data.dta";

***append on 2009 math data;
append using ${m09}math2009.dta;
*these are good, but we didn't get the subscale scores by subgroup for other years;
drop if year==2009 & scale!="Composite Scale" & group!="All Students";
replace type="Public" if year==2009;
tab year;
*fill in state codes and other missing data;
preserve;
keep if year==2007;
keep state statecode trend nclb03;
duplicates drop;
save sc.dta, replace;
restore;
mmerge state using sc.dta, t(n:1) update;
drop _m;
erase sc.dta;
replace trend=trend+2 if year==2009;
replace trendnclb03=year-2002 if year==2009;
***merge on 2009 math exlusion rates; 
mmerge state year grade subject using ${m09}math2009_excl.dta, t(n:1) update;
gen mmexc=excl==.;
tab year mmexc;
drop mmexc _m;

*Rename group variable;
replace group="All" if group=="All students"|group=="All Students";

*fill in missing type;
replace type="Public" if subject=="Science";
replace type="Public" if group!="All";
replace type="Public" if scale!="Composite Scale";
tab type, m;

*Drop irrelevant sample;
keep if statecode!="NP" & statecode!="NA" & type=="Public";


*********************************************************
**********CORRECTED ACCOUNTABILITY***********************
**********YEAR IDENTIFIES FALL OF AY*********************
*********************************************************;

gen yearca=.;
label variable yearca "year that state adopted consequential accountability";
gen yearrc=.;
label variable yearrc "alternative coding for consequential accountability - not used for most specs";
replace yearca=1997 if statecode=="AL";
replace yearca=1999 if statecode=="AR";
replace yearca=1999 if statecode=="CA";
replace yearca=1999 if statecode=="CT";
replace yearca=1998 if statecode=="DE";
replace yearca=1999 if statecode=="FL";
replace yearca=2000 if statecode=="GA";
replace yearca=1995 if statecode=="KY";
replace yearca=1999 if statecode=="LA";
replace yearca=1998 if statecode=="MA";
replace yearca=1999 if statecode=="MD";
replace yearca=1998 if statecode=="MI";
replace yearca=1996 if statecode=="NC";
*replace yearca=2003 if statecode=="ND";
replace yearca=1998 if statecode=="NM";
replace yearca=1996 if statecode=="NV";
replace yearca=1998 if statecode=="NY";
replace yearca=1996 if statecode=="OK";
replace yearca=2000 if statecode=="OR";
replace yearca=1997 if statecode=="RI";
replace yearca=1999 if statecode=="SC";
replace yearca=2000 if statecode=="TN";
replace yearca=1994 if statecode=="TX";
*replace yearca=2003 if statecode=="UT";
replace yearca=1998 if statecode=="VA";
replace yearca=1999 if statecode=="VT";
replace yearca=1993 if statecode=="WI";
replace yearca=1997 if statecode=="WV";
replace yearca=1992 if statecode=="IL";
replace yearca=2001 if statecode=="AK";
replace yearca=1995 if statecode=="IN";
replace yearca=1995 if statecode=="KS";

replace yearrc=1990 if statecode=="CT";
replace yearrc=1996 if statecode=="TN";
replace yearrc=1988 if statecode=="NJ";
replace yearrc=1999 if statecode=="AK";
replace yearrc=1999 if statecode=="IA";
replace yearrc=1995 if statecode=="NH";
replace yearrc=1997 if statecode=="OH";
replace yearrc=1995 if statecode=="PA";
replace yearrc=1991 if statecode=="SD";
replace yearrc=2000 if statecode=="AZ";
replace yearrc=2001 if statecode=="CO";
replace yearrc=1997 if statecode=="DC";
replace yearrc=2001 if statecode=="HI";
replace yearrc=1995 if statecode=="IN";
replace yearrc=1995 if statecode=="KS";
replace yearrc=1999 if statecode=="ME";
replace yearrc=1996 if statecode=="MN";
replace yearrc=1997 if statecode=="MO";
replace yearrc=1994 if statecode=="MS";
replace yearrc=1997 if statecode=="MT";
replace yearrc=2001 if statecode=="NE";
replace yearrc=1997 if statecode=="WA";
replace yearrc=1999 if statecode=="WY";

*match to naep year variable where year is coded as spring of academic year;
replace yearca=yearca+1;
replace yearrc=yearrc+1;

*create one more alternative consequential accountability measure that requires more than ratings to count as ca;

gen yearca2=yearca;
replace yearca2=. if statecode=="NC"|statecode=="WI"|statecode=="VA";
label variable yearca2 "consequential accountability that requires more than ratings to count as ca";

*Create variables to designate years of data;
egen tmp=nvals(year) if year<2002&mean!=., by(type group subject grade statecode);
egen noobspre02g=max(tmp), by(type group subject grade statecode);
replace noobspre02g=0 if noobspre02g==.;
drop tmp;
egen tmp=nvals(year) if year<2003&mean!=., by(type group subject grade statecode);
egen noobspre03g=max(tmp), by(type group subject grade statecode);
replace noobspre03g=0 if noobspre03g==.;
drop tmp;
for any 1990 1992 1994 1996 1998 2000 2002 2005 2007:
     egen obsX=max(year==X&mean!=.), by(type group subject grade statecode);

gen exca9901=yearca>=1999&yearca<=2001;

*More covariates (squares etc.) and use correct versions of race covariates;

*Baseline covs = excl, excl_sq, povpct, povpct_sq ;
*Resources = pup_tea, curexp ;
*Demo = ipct_fl ipct_blk ipct_his ipct_wht ipct_oth ; 

drop iblack_mexp ihisp_mexp iwhite_mexp;
rename exp_pup curexp;
rename pctpov povpct;
gen ipct_oth=1-ipct_blk-ipct_his-ipct_wht;
replace ipct_oth=0 if ipct_oth<0;

*check new race vars;
egen tmp=rsum(pct_white pct_black pct_hisp pct_asian);
sum tmp, det;
gen mm_race=tmp==0;
sum tmp if mm_race==0, det;
egen tmp2=rownonmiss(pct_white pct_black pct_hisp pct_asian);
tab tmp2 if mm_race==0;
sum tmp if tmp2<4 & mm_race==0, det;
  *these missing should be coded as 0s;
for var pct_white pct_black pct_hisp pct_asian:  replace X=0 if X==.&mm_race==0;
for var pct_white pct_black pct_hisp pct_asian:  replace X=0 if mm_race==1;
sum pct_white pct_black pct_hisp pct_asian mm_race;
drop tmp*;
for var frac_pub frac_fulldayk_mavg frac_prek_mavg: gen mm_X= X==.;
for var frac_pub frac_fulldayk_mavg frac_prek_mavg: replace X=0 if X==.;


*Use most relevant CPI deflator to create ppexp in real terms - 2007 $$ ; 
sum curexp;
replace curexp=curexp*207.342/130.7 if year==1990;
replace curexp=curexp*207.342/140.3 if year==1992;
replace curexp=curexp*207.342/148.2 if year==1994;
replace curexp=curexp*207.342/156.9 if year==1996;
replace curexp=curexp*207.342/163.0 if year==1998;
replace curexp=curexp*207.342/172.2 if year==2000;
replace curexp=curexp*207.342/179.9 if year==2002;
replace curexp=curexp*207.342/184.0 if year==2003;
replace curexp=curexp*207.342/195.3 if year==2005;
sum curexp;

/*Add on state-level economic data -- this data is created in a separate do file that is called below
and is described in the main documentation*/
preserve;
do ${econ}makeecon_ind.do;
restore;
*merge on lagged economic data;
drop unemprate povpct;
mmerge statecode year using ${econ}econind_lag.dta, t(n:1) unmatched(master);

replace povpct=100*povpct;
replace emppop=100*emppop;
gen curexp_sq=curexp^2;
gen povpct_sq=povpct^2;
gen excl_sq=excl^2;
gen puptea_sq=pup_tea^2;
gen lnppexp=ln(curexp);
gen unemprate_sq=unemprate^2;

*parental ed variables;
egen pared_1=rsum(pared_lths pared_hsgrad);
label var pared_1 "HS or less";
egen pared_2=rsum(pared_sc pared_ba);
label var pared_2 "SC or more";
gen pared_3=pared_ba;
label var pared_3 "BA or more";

*Create baseline sample;
gen samp=0;
replace samp=noobspre02g>=2&obs2000==1&statecode!="NP"&statecode!="NA" & type=="Public" if grade==4 & subject=="Math" ;
replace samp=noobspre02g>=2&obs2000==1&statecode!="NP"&statecode!="NA" & type=="Public" if grade==8 & subject=="Math";
replace samp=obs1998==1&obs2002==1&statecode!="NP"&statecode!="NA" & type=="Public" if grade==4 & subject=="Reading";
replace samp=obs1998==1&obs2002==1&statecode!="NP"&statecode!="NA" & type=="Public" if grade==8 & subject=="Reading";
replace samp=obs1996==1&obs2000==1&obs2005==1&statecode!="NP"&statecode!="NA" & type=="Public" if grade==8 & subject=="Science";
replace samp=obs2000==1&obs2005==1&statecode!="NP"&statecode!="NA" & type=="Public" if grade==4 & subject=="Science";

***for now main results won't have 2009***;
gen samp2009=samp;
replace samp=0 if year==2009;

gen math4=subject=="Math"&grade==4;
gen math8=subject=="Math"&grade==8;
gen read4=subject=="Reading"&grade==4;
gen read8=subject=="Reading"&grade==8;
gen sci4=subject=="Science"&grade==4;
gen sci8=subject=="Science"&grade==8;

tab year; 
drop if year==1990;


*accomodations coding;
/*
For math....

prior to 2000, accomodations were not permitted.
In 2000, two different administrations - with and without ac
after 2000, accomodations always permitted
for our baseline spec, we use the admin with no accomodations in 2000 so as to
not confound implementation of nclb and change in accomodation status
but as a robustness check, we can use the admin that did allow accomodations in 2000

For reading....
same situation but 1998 was the switch year where both ac and no ac were given
so, baseline is with accomodations

baseline: ( (ac_perm==1 & year==2000)|(year!=2000) )  & subject=="Math"
baseline: ( (ac_perm==1 & year==1998)|(year!=1998) )  & subject=="Reading"

alt: ( (ac_perm==0 & year==2000)|(year!=2000) )  & subject=="Math"
alt: ( (ac_perm==0 & year==1998)|(year!=1998) )  & subject=="Reading"
*/

gen ac_base=( (ac_perm==1 & year==2000)|(year!=2000) )
    	      		    			      if subject=="Math"|subj=="Science";
replace ac_base=( (ac_perm==1 & year==1998)|(year!=1998) ) if subject=="Reading";

gen ac_alt=( (ac_perm==0 & year==2000)|(year!=2000) ) if subject=="Math"|subj=="Science";
replace ac_alt=( (ac_perm==0 & year==1998)|(year!=1998) ) if subject=="Reading";

compress;

/*NOTE: This is the dataset used for analysis -- save as "tmpall.dta"*/
save ${data}tmpall.dta, replace;

/*Label variables*/








save ${data}tmpall.dta, replace;

log close;


/*Now, call up all other do files -- the following do files create all tables and figures presented in the NBER paper, the Brookings paper
and the final JPAM paper*/

*national trend figures;
do nclb_figs1_10_UPDATE.do;

*trend line figures;
do nclb_fig_UPDATE.do;
do nclb_tab7figs.do;
do nclb_fig2009.do;

*cath/pub appendix figs;
do pub_cath2_UPDATE.do;

*summary stats;
do nclb_tab1.do;

*main table of results; 
do nclb_tab3.do;
do nclb_tab3_2009.do;  *2009 math results;
do nclb_tab3sci.do;    *science results;
do nclb_tab3noHI.do;   *dropping hawaii;

*specifications using state vs. naep prof. results;
do nclb_tab3b.do;

*robustness specifications;
do nclb_tab4_v3.do; *new edits by EW on 11.14.10;

*falsification tests;
do nclb_tab5.do;

do nclb_tab5abc.do;    *panel A B and C results;
do nclb_unemp_outliers.do;    *look for outliers by state in unemp results; 

*main specs, multiple outcomes;
do nclb_tab6.do;
do nclb_tab6_nclb04.do;
do nclb_tab6cov.do;  *include state-year covariates;

*student subgroups, multiple outcomes;
do nclb_tab7.do;
do nclb_tab7_nclb04.do;
do nclb_tab7sci.do;	*science results;
do nclb_tab7_98_07.do;	*dropping <1998;
do nclb_tab7cov.do;	*state-year covariates;

*subscale scores;
do nclb_tab11.do;

*subscale scores, multiple outcomes;
do nclb_tab12.do;




