
/***********************************************************************************************
This do file combines datasets from all SASS surveys.
***********************************************************************************************/

# delimit;
capture log close;
capture program drop all;
clear;
clear matrix;
set more off;
set mem 2000M;

global data "C:/Users/nschwart/Desktop/nclb_final/sass_folder/raw_data/";

/******************TEACHER DATA*****************************/

/*Combine all survey years*/
use "${data}cleaned_9394.dta";
append using "${data}cleaned_9900.dta";
append using "${data}cleaned_0304.dta";
append using "${data}cleaned_0708.dta";

rename STATE FIPS_code;
gen state="";
replace state="Alabama" if FIPS_code==1;
replace state="Alaska" if FIPS_code==2;
replace state="Arizona" if FIPS_code==4;
replace state="Arkansas" if FIPS_code==5;
replace state="California" if FIPS_code==6;
replace state="Colorado" if FIPS_code==8;
replace state="Connecticut" if FIPS_code==9;
replace state="Delaware" if FIPS_code==10;
replace state="Washington D.C." if FIPS_code==11;
replace state="Florida" if FIPS_code==12;
replace state="Georgia" if FIPS_code==13;
replace state="Hawaii" if FIPS_code==15;
replace state="Idaho" if FIPS_code==16;
replace state="Illinois" if FIPS_code==17;
replace state="Indiana" if FIPS_code==18;
replace state="Iowa" if FIPS_code==19;
replace state="Kansas" if FIPS_code==20;
replace state="Kentucky" if FIPS_code==21;
replace state="Louisiana" if FIPS_code==22;
replace state="Maine" if FIPS_code==23;
replace state="Maryland" if FIPS_code==24;
replace state="Massachusetts" if FIPS_code==25;
replace state="Michigan" if FIPS_code==26;
replace state="Minnesota" if FIPS_code==27;
replace state="Mississippi" if FIPS_code==28;
replace state="Missouri" if FIPS_code==29;
replace state="Montana" if FIPS_code==30;
replace state="Nebraska" if FIPS_code==31;
replace state="Nevada" if FIPS_code==32;
replace state="New Hampshire" if FIPS_code==33;
replace state="New Jersey" if FIPS_code==34;
replace state="New Mexico" if FIPS_code==35;
replace state="New York" if FIPS_code==36;
replace state="North Carolina" if FIPS_code==37;
replace state="North Dakota" if FIPS_code==38;
replace state="Ohio" if FIPS_code==39;
replace state="Oklahoma" if FIPS_code==40;
replace state="Oregon" if FIPS_code==41;
replace state="Pennsylvania" if FIPS_code==42;
replace state="Rhode Island" if FIPS_code==44;
replace state="South Carolina" if FIPS_code==45;
replace state="South Dakota" if FIPS_code==46;
replace state="Tennessee" if FIPS_code==47;
replace state="Texas" if FIPS_code==48;
replace state="Utah" if FIPS_code==49;
replace state="Vermont" if FIPS_code==50;
replace state="Virginia" if FIPS_code==51;
replace state="Washington" if FIPS_code==53;
replace state="West Virginia" if FIPS_code==54;
replace state="Wisconsin" if FIPS_code==55;
replace state="Wyoming" if FIPS_code==56;
drop SCHLEVE2;

/*Merge in school data*/
mmerge CNTLNUMS survey_year using "${data}s_allyears.dta";
drop if _merge==1 | _merge==2;
drop _merge;

/*Drop outliers*/
replace avg_class=. if avg_class>50;
replace hrs_per_wk=. if hrs_per_wk<20 | hrs_per_wk>50;
*replace days=. if days<150 | days>260;
replace sc_hrs_eng=. if sc_hrs_eng>35;
replace sc_hrs_math=. if sc_hrs_math>35;
replace sc_hrs_ss=. if sc_hrs_ss>35;
replace sc_hrs_sci=. if sc_hrs_sci>35;
gen academic_time = (sc_hrs_eng+sc_hrs_math+sc_hrs_sci+sc_hrs_ss);
replace sc_hrs_eng=. if academic_time>40;
replace sc_hrs_math=. if academic_time>40;
replace sc_hrs_ss=. if academic_time>40;
replace sc_hrs_sci=. if academic_time>40;
replace academic_time = (sc_hrs_eng+sc_hrs_math+sc_hrs_sci+sc_hrs_ss);
replace academic_time = . if academic_time>hrs_per_wk;
gen math_ELA_time = (sc_hrs_math+sc_hrs_eng);

/*Generate some additional variables*/
gen academ_time_ratio = academic_time/hrs_per_wk;
gen math_ELA_time_ratio = math_ELA_time/hrs_per_wk;
gen math_ELA_ratio_alt = math_ELA_time/academic_time;
gen math_time_ratio = sc_hrs_math/hrs_per_wk;
gen math_ratio_alt = sc_hrs_math/academic_time;
gen ELA_time_ratio = sc_hrs_eng/hrs_per_wk;
gen ELA_ratio_alt = sc_hrs_eng/academic_time;
gen ss_ratio_alt = sc_hrs_ss/academic_time;
gen sci_ratio_alt = sc_hrs_sci/academic_time;
gen core_subj_hours = (sc_hrs_eng+sc_hrs_math)/(sc_hrs_eng+sc_hrs_math+sc_hrs_sci+sc_hrs_ss);
gen math_vs_ELA = (sc_hrs_math)/(sc_hrs_math+sc_hrs_eng);
gen departmentalized=0;
replace departmentalized = 1 if self_contained==0;
*gen hrs_per_yr = (days/5)*hrs_per_wk;
rename sc_hrs_eng ELA_time;
rename sc_hrs_math math_time;

/*Merge in consequential accountability data*/
mmerge state using "${data}ca_data.dta";

/*Generate assignment sample of only math and ELA/reading teachers*/
gen assign_sample=0;
replace assign_sample=1 if ASSIGN03==1 | ASSIGN03==4 | ASSIGN03==8;
gen lunch20_sample=0;
replace lunch20_sample=1 if pct_lunch>20;
gen lunch50_or_more_sample=0;
replace lunch50_or_more_sample=1 if pct_lunch>50;
gen lunch50_or_less_sample=0;
replace lunch50_or_less_sample=1 if pct_lunch<=50;

/*Generate teacher-grade-combination covariates*/
egen grades_taught=concat(kind first second third fourth fifth sixth seventh eighth ninth tenth eleventh twelfth);
egen school_span=concat(s_kind s_first s_second s_third s_fourth s_fifth s_sixth s_seventh s_eighth s_ninth 
 s_tenth s_eleventh s_twelfth);

/*Generate assignment dummies*/
gen math = 0;
replace math=1 if ASSIGN03==8;
gen ela = 0;
replace ela = 1 if ASSIGN03==4;
gen genelem = 0;
replace genelem = 1 if ASSIGN03==1;
gen primary_sample=0;
replace primary_sample=1 if SCHLEVE2==1;
gen middle_sample=0;
replace middle_sample=1 if SCHLEVE2==2;
gen high_sample=0;
replace high_sample=1 if SCHLEVE2==3;

/*Generate covariates and treatment variables for regressions*/
gen TREAT = yearca>2001 | yearca==.;
gen year = survey_year - 1988;
gen nclb = 0;
replace nclb = 1 if survey_year>= 2002;

gen nclb2003 = 0;
gen nclb2007 = 0;
replace nclb2003 = 1 if survey_year==2003;
replace nclb2007 = 1 if survey_year==2007;

gen yr_since_nclb = survey_year - 2001;
replace yr_since_nclb = 0 if yr_since_nclb<0;
gen TREAT_year = TREAT*year;
gen TREAT_nclb = TREAT*nclb;
gen TREAT_yr_since = TREAT*yr_since_nclb;

/*Generate sample that will exclude states that adopted consequential accountability in particular years*/
gen exca9801 = yearca==1998 | yearca==1999 | yearca==2000 | yearca==2001;
gen exca9901 = yearca==1999 | yearca==2000 | yearca==2001;

/*Generate continuous treatment measure*/
gen NEWTREAT=.;
replace NEWTREAT=11 if yearca==.;
replace NEWTREAT=yearca-1992 if yearca~=.;
gen NEWTREAT_year = NEWTREAT*year;
gen NEWTREAT_nclb = NEWTREAT*nclb;
gen NEWTREAT_yr_since = NEWTREAT*yr_since_nclb;

/*Generate dummies for DD specification*/
gen NEWTREAT_nclb2003 = NEWTREAT*nclb2003;
gen NEWTREAT_nclb2007 = NEWTREAT*nclb2007;
gen TREAT_nclb2003 = TREAT*nclb2003;
gen TREAT_nclb2007 = TREAT*nclb2007;

/*Generate alternative treatment variable called "ca_treat" that turns on whenever state has CA*/
gen ca_treat=0;
replace ca_treat=1 if yearca-1==survey_year;
replace ca_treat=1 if survey_year>=2002;
gen ca_treat2003 = ca_treat*nclb2003;
gen ca_treat2007 = ca_treat*nclb2007;

/*Generate alternative treatment variable called "yearsca_treat" that measures number of years of CA*/
gen yearsca_treat = .;
replace yearsca_treat = max(0, survey_year+2 - yearca) if yearca~=.;
replace yearsca_treat = max(0, survey_year - 2001) if yearca==.;
gen yearsca_treat_sq = yearsca_treat^2;

/*Generate year dummies and rename state enrollment variable*/
tab survey_year, gen(yr);
*rename enrollment state_enrollment;

gen pct_lunch_flag = 0;
replace pct_lunch_flag = 1 if pct_lunch==.;
replace pct_lunch = 0 if pct_lunch==.;

/*More covariates*/
/*Generate squared, cubic, quartic, and interaction terms for variables*/
gen school_enroll_hun = school_enroll/100;
label variable school_enroll_hun "School Enrollment (100s)";
foreach v in AGE_T pct_min pct_lunch school_enroll_hun {;
gen `v'_sq = `v'^2;
label variable `v'_sq "`v' squared";
gen `v'_cu = `v'^3;
label variable `v'_cu "`v' cubed";
gen `v'_qu = `v'^4;
label variable `v'_qu "`v' to the 4th";
};
gen pct_min_lunch = pct_min*pct_lunch;
gen AGE_T_male = AGE_T*male;
gen AGE_T_sq_male = AGE_T_sq*male;
gen AGE_T_cu_male = AGE_T_cu*male;
gen AGE_T_qu_male = AGE_T_qu*male;

/*Generate composite survey measures that combine several survey questions into one total measure*/
gen sch_disc=ad_prin_disc+ad_tch_disc;
gen stu_cult=p_tardiness+p_absent+p_classcut+p_dropout+p_apathy+p_preparation;
gen resource=ad_mater_ad;

/*Merge in state economic indicator data*/
mmerge state survey_year using "${data}econind_lag.dta", t(n:1);
drop if _merge==2;
replace povpct=povpct*100;
replace emppop=emppop*100;
replace pct_black=0 if pct_black==.;
replace pct_hisp=0 if pct_hisp==.;
drop _merge;

/*Drop schools that are more than 80% special education*/
drop if pct_iep>80;

/*Save teacher dataset*/
save "${data}final_teachers.dta", replace;
clear;

/****************************PRINCIPAL DATASET*********************/

use "${data}p_allyears.dta";
capture drop _merge;

rename STATE FIPS_code;
gen state="";
replace state="Alabama" if FIPS_code==1;
replace state="Alaska" if FIPS_code==2;
replace state="Arizona" if FIPS_code==4;
replace state="Arkansas" if FIPS_code==5;
replace state="California" if FIPS_code==6;
replace state="Colorado" if FIPS_code==8;
replace state="Connecticut" if FIPS_code==9;
replace state="Delaware" if FIPS_code==10;
replace state="Washington D.C." if FIPS_code==11;
replace state="Florida" if FIPS_code==12;
replace state="Georgia" if FIPS_code==13;
replace state="Hawaii" if FIPS_code==15;
replace state="Idaho" if FIPS_code==16;
replace state="Illinois" if FIPS_code==17;
replace state="Indiana" if FIPS_code==18;
replace state="Iowa" if FIPS_code==19;
replace state="Kansas" if FIPS_code==20;
replace state="Kentucky" if FIPS_code==21;
replace state="Louisiana" if FIPS_code==22;
replace state="Maine" if FIPS_code==23;
replace state="Maryland" if FIPS_code==24;
replace state="Massachusetts" if FIPS_code==25;
replace state="Michigan" if FIPS_code==26;
replace state="Minnesota" if FIPS_code==27;
replace state="Mississippi" if FIPS_code==28;
replace state="Missouri" if FIPS_code==29;
replace state="Montana" if FIPS_code==30;
replace state="Nebraska" if FIPS_code==31;
replace state="Nevada" if FIPS_code==32;
replace state="New Hampshire" if FIPS_code==33;
replace state="New Jersey" if FIPS_code==34;
replace state="New Mexico" if FIPS_code==35;
replace state="New York" if FIPS_code==36;
replace state="North Carolina" if FIPS_code==37;
replace state="North Dakota" if FIPS_code==38;
replace state="Ohio" if FIPS_code==39;
replace state="Oklahoma" if FIPS_code==40;
replace state="Oregon" if FIPS_code==41;
replace state="Pennsylvania" if FIPS_code==42;
replace state="Rhode Island" if FIPS_code==44;
replace state="South Carolina" if FIPS_code==45;
replace state="South Dakota" if FIPS_code==46;
replace state="Tennessee" if FIPS_code==47;
replace state="Texas" if FIPS_code==48;
replace state="Utah" if FIPS_code==49;
replace state="Vermont" if FIPS_code==50;
replace state="Virginia" if FIPS_code==51;
replace state="Washington" if FIPS_code==53;
replace state="West Virginia" if FIPS_code==54;
replace state="Wisconsin" if FIPS_code==55;
replace state="Wyoming" if FIPS_code==56;

/*Merge in school data*/
mmerge CNTLNUMS survey_year using "${data}s_allyears.dta";
drop if _merge==1 | _merge==2;
drop if pct_iep>80;

/*Generate grade-combo variable*/
egen school_span=concat(s_kind s_first s_second s_third s_fourth s_fifth s_sixth s_seventh s_eighth s_ninth 
 s_tenth s_eleventh s_twelfth);

/*Generate samples based on percentage free lunch students in school*/
gen lunch20_sample=0;
replace lunch20_sample=1 if pct_lunch>20;
gen lunch50_or_more_sample=0;
replace lunch50_or_more_sample=1 if pct_lunch>50;
gen lunch50_or_less_sample=0;
replace lunch50_or_less_sample=1 if pct_lunch<=50;

/*Generate samples based on school level*/
gen primary_sample=0;
replace primary_sample=1 if SCHLEVE2==1;
gen middle_sample=0;
replace middle_sample=1 if SCHLEVE2==2;
gen high_sample=0;
replace high_sample=1 if SCHLEVE2==3;

/*Merge in consequential accountability data*/
mmerge state using "${data}ca_data.dta";

/*Generate covariates and treatment variables for regressions*/
gen TREAT = yearca>2001 | yearca==.;
gen year = survey_year - 1988;
gen nclb = 0;
replace nclb = 1 if survey_year>= 2002;

gen nclb2003 = 0;
gen nclb2007 = 0;
replace nclb2003 = 1 if survey_year==2003;
replace nclb2007 = 1 if survey_year==2007;

gen yr_since_nclb = survey_year - 2002;
replace yr_since_nclb = 0 if yr_since_nclb<0;
gen TREAT_year = TREAT*year;
gen TREAT_nclb = TREAT*nclb;
gen TREAT_yr_since = TREAT*yr_since_nclb;

gen exca9801 = yearca==1998 | yearca==1999 | yearca==2000 | yearca==2001;
gen exca9901 = yearca==1999 | yearca==2000 | yearca==2001;

/*Generate continuous treatment measure*/
gen NEWTREAT=.;
replace NEWTREAT=11 if yearca==.;
replace NEWTREAT=yearca-1992 if yearca~=.;
gen NEWTREAT_year = NEWTREAT*year;
gen NEWTREAT_nclb = NEWTREAT*nclb;
gen NEWTREAT_yr_since = NEWTREAT*yr_since_nclb;

/*Generate dummies for DD specification*/
gen NEWTREAT_nclb2003 = NEWTREAT*nclb2003;
gen NEWTREAT_nclb2007 = NEWTREAT*nclb2007;
gen TREAT_nclb2003 = TREAT*nclb2003;
gen TREAT_nclb2007 = TREAT*nclb2007;

/*Generate alternative treatment variable called "ca_treat" that turns on whenever state has CA*/
gen ca_treat=0;
replace ca_treat=1 if yearca-1==survey_year;
replace ca_treat=1 if survey_year>2002;
gen ca_treat2003 = ca_treat*nclb2003;
gen ca_treat2007 = ca_treat*nclb2007;

/*Generate alternative treatment variable called "yearsca_treat" that measures number of years of CA*/
gen yearsca_treat = .;
replace yearsca_treat = max(0, survey_year+1 - yearca) if yearca~=.;
replace yearsca_treat = max(0, survey_year - 2002) if yearca==.;
gen yearsca_treat_sq = yearsca_treat^2;

/*Generate year dummies and rename state enrollment variable*/
tab survey_year, gen(yr);
*rename enrollment state_enrollment;

gen pct_lunch_flag = 0;
replace pct_lunch_flag = 1 if pct_lunch==.;
replace pct_lunch = 0 if pct_lunch==.;

/*More covariates*/
/*Generate squared, cubic, quartic, and interaction terms for variables*/
gen school_enroll_hun = school_enroll/100;
label variable school_enroll_hun "School Enrollment (100s)";
foreach v in p_age pct_min pct_lunch school_enroll_hun {;
gen `v'_sq = `v'^2;
label variable `v'_sq "`v' squared";
gen `v'_cu = `v'^3;
label variable `v'_cu "`v' cubed";
gen `v'_qu = `v'^4;
label variable `v'_qu "`v' to the 4th";
};
gen pct_min_lunch = pct_min*pct_lunch;
gen p_age_male = p_age*p_male;
gen p_age_sq_male = p_age_sq*p_male;
gen p_age_cu_male = p_age_cu*p_male;
gen p_age_qu_male = p_age_qu*p_male;

/*Merge in state economic indicator data*/
mmerge state survey_year using "${data}econind_lag.dta", t(n:1);
drop if _merge==2;
replace povpct=povpct*100;
replace emppop=emppop*100;
replace pct_black=0 if pct_black==.;
replace pct_hisp=0 if pct_hisp==.;
drop _merge;

save "${data}final_principals.dta", replace;
clear;

/*****************************CREATE SEPARATE SAMPLES**************************/
/*This next section divides data up into a series of datasets based on the school level (elementary, middle, high, or both elementary and middle)
and based on free lunch percentage (greater or less than 50% free lunch). It also renames the teacher-grade and school-grade covariates to make
them slightly easier to use (this part is what makes the following code a little unwieldy).*/

use "${data}final_teachers.dta";
preserve;
keep if assign_sample==1 & primary_sample==1;
gen grade_combo0=0;
replace grade_combo0=1 if grades_taught=="1000000000000";
gen grade_combo1=0;
replace grade_combo1=1 if grades_taught=="0100000000000";
gen grade_combo2=0;
replace grade_combo2=1 if grades_taught=="0010000000000";
gen grade_combo3=0;
replace grade_combo3=1 if grades_taught=="0001000000000";
gen grade_combo4=0;
replace grade_combo4=1 if grades_taught=="0000100000000";
gen grade_combo5=0;
replace grade_combo5=1 if grades_taught=="0000010000000";
gen grade_combo6=0;
replace grade_combo6=1 if grades_taught=="0000001000000";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo0==0 & grade_combo1==0 & grade_combo2==0 & grade_combo3==0 & grade_combo4==0 
 & grade_combo5==0 & grade_combo6==0;
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_assign_elem.dta", replace;
restore;
preserve;
keep if assign_sample==1 & middle_sample==1;
gen grade_combo6=0;
replace grade_combo6=1 if grades_taught=="0000001000000";
gen grade_combo7=0;
replace grade_combo7=1 if grades_taught=="0000000100000";
gen grade_combo8=0;
replace grade_combo8=1 if grades_taught=="0000000010000";
gen grade_combo78=0;
replace grade_combo78=1 if grades_taught=="0000000110000";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo6==0 & grade_combo7==0 & grade_combo8==0 & grade_combo78==0;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_assign_middle.dta", replace;
restore;
preserve;
keep if assign_sample==1 & high_sample==1;
gen grade_combo9101112=0;
replace grade_combo9101112=1 if grades_taught=="0000000001111";
gen grade_combo101112=0;
replace grade_combo101112=1 if grades_taught=="0000000000111";
gen grade_combo9=0;
replace grade_combo9=1 if grades_taught=="0000000001000";
gen grade_combo1112=0;
replace grade_combo1112=1 if grades_taught=="0000000000011";
gen grade_combo910=0;
replace grade_combo910=1 if grades_taught=="0000000001100";
gen grade_combo91011=0;
replace grade_combo91011=1 if grades_taught=="0000000001110";
gen grade_combo10=0;
replace grade_combo10=1 if grades_taught=="0000000000100";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo9101112==0 & grade_combo101112==0 & grade_combo9==0 & grade_combo1112==0 & grade_combo910==0
 & grade_combo91011==0 & grade_combo10==0;
gen s_grade_combo712=0;
gen s_grade_combo79=0;
gen s_grade_combo912=0;
gen s_grade_combo1012=0;
gen s_grade_combo_other=0;
replace s_grade_combo712=1 if school_span=="0000000111111";
replace s_grade_combo79=1 if school_span=="0000000111000";
replace s_grade_combo912=1 if school_span=="0000000001111";
replace s_grade_combo1012=1 if school_span=="0000000000111";
replace s_grade_combo_other=1 if s_grade_combo712==0 & s_grade_combo79==0 & s_grade_combo912==0 & s_grade_combo1012==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_assign_high.dta", replace;
restore;
preserve;
keep if assign_sample==1 & (middle_sample==1 | primary_sample==1);
gen grade_combo0=0;
replace grade_combo0=1 if grades_taught=="1000000000000";
gen grade_combo1=0;
replace grade_combo1=1 if grades_taught=="0100000000000";
gen grade_combo2=0;
replace grade_combo2=1 if grades_taught=="0010000000000";
gen grade_combo3=0;
replace grade_combo3=1 if grades_taught=="0001000000000";
gen grade_combo4=0;
replace grade_combo4=1 if grades_taught=="0000100000000";
gen grade_combo5=0;
replace grade_combo5=1 if grades_taught=="0000010000000";
gen grade_combo6=0;
replace grade_combo6=1 if grades_taught=="0000001000000";
gen grade_combo7=0;
replace grade_combo7=1 if grades_taught=="0000000100000";
gen grade_combo8=0;
replace grade_combo8=1 if grades_taught=="0000000010000";
gen grade_combo78=0;
replace grade_combo78=1 if grades_taught=="0000000110000";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo0==0 & grade_combo1==0 & grade_combo2==0 & grade_combo3==0 & grade_combo4==0 
 & grade_combo5==0 & grade_combo6==0 & grade_combo7==0 & grade_combo8==0 & grade_combo78==0;
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0 & s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_assign_elmid.dta", replace;
restore;

preserve;
keep if lunch50_or_more_sample==1 & assign_sample==1 & primary_sample==1;
gen grade_combo0=0;
replace grade_combo0=1 if grades_taught=="1000000000000";
gen grade_combo1=0;
replace grade_combo1=1 if grades_taught=="0100000000000";
gen grade_combo2=0;
replace grade_combo2=1 if grades_taught=="0010000000000";
gen grade_combo3=0;
replace grade_combo3=1 if grades_taught=="0001000000000";
gen grade_combo4=0;
replace grade_combo4=1 if grades_taught=="0000100000000";
gen grade_combo5=0;
replace grade_combo5=1 if grades_taught=="0000010000000";
gen grade_combo6=0;
replace grade_combo6=1 if grades_taught=="0000001000000";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo0==0 & grade_combo1==0 & grade_combo2==0 & grade_combo3==0 & grade_combo4==0 
 & grade_combo5==0 & grade_combo6==0;
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_lunch50ormore_assign_elem.dta", replace;
restore;
preserve;
keep if lunch50_or_more_sample==1 & assign_sample==1 & middle_sample==1;
gen grade_combo6=0;
replace grade_combo6=1 if grades_taught=="0000001000000";
gen grade_combo7=0;
replace grade_combo7=1 if grades_taught=="0000000100000";
gen grade_combo8=0;
replace grade_combo8=1 if grades_taught=="0000000010000";
gen grade_combo78=0;
replace grade_combo78=1 if grades_taught=="0000000110000";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo6==0 & grade_combo7==0 & grade_combo8==0 & grade_combo78==0;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_lunch50ormore_assign_middle.dta", replace;
restore;
preserve;
keep if lunch50_or_more_sample==1 & assign_sample==1 & high_sample==1;
gen grade_combo9101112=0;
replace grade_combo9101112=1 if grades_taught=="0000000001111";
gen grade_combo101112=0;
replace grade_combo101112=1 if grades_taught=="0000000000111";
gen grade_combo9=0;
replace grade_combo9=1 if grades_taught=="0000000001000";
gen grade_combo1112=0;
replace grade_combo1112=1 if grades_taught=="0000000000011";
gen grade_combo910=0;
replace grade_combo910=1 if grades_taught=="0000000001100";
gen grade_combo91011=0;
replace grade_combo91011=1 if grades_taught=="0000000001110";
gen grade_combo10=0;
replace grade_combo10=1 if grades_taught=="0000000000100";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo9101112==0 & grade_combo101112==0 & grade_combo9==0 & grade_combo1112==0 & grade_combo910==0
 & grade_combo91011==0 & grade_combo10==0;
gen s_grade_combo712=0;
gen s_grade_combo79=0;
gen s_grade_combo912=0;
gen s_grade_combo1012=0;
gen s_grade_combo_other=0;
replace s_grade_combo712=1 if school_span=="0000000111111";
replace s_grade_combo79=1 if school_span=="0000000111000";
replace s_grade_combo912=1 if school_span=="0000000001111";
replace s_grade_combo1012=1 if school_span=="0000000000111";
replace s_grade_combo_other=1 if s_grade_combo712==0 & s_grade_combo79==0 & s_grade_combo912==0 & s_grade_combo1012==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_lunch50ormore_assign_high.dta", replace;
restore;
preserve;
keep if lunch50_or_more_sample==1 & assign_sample==1 & (middle_sample==1 | primary_sample==1);
gen grade_combo0=0;
replace grade_combo0=1 if grades_taught=="1000000000000";
gen grade_combo1=0;
replace grade_combo1=1 if grades_taught=="0100000000000";
gen grade_combo2=0;
replace grade_combo2=1 if grades_taught=="0010000000000";
gen grade_combo3=0;
replace grade_combo3=1 if grades_taught=="0001000000000";
gen grade_combo4=0;
replace grade_combo4=1 if grades_taught=="0000100000000";
gen grade_combo5=0;
replace grade_combo5=1 if grades_taught=="0000010000000";
gen grade_combo6=0;
replace grade_combo6=1 if grades_taught=="0000001000000";
gen grade_combo7=0;
replace grade_combo7=1 if grades_taught=="0000000100000";
gen grade_combo8=0;
replace grade_combo8=1 if grades_taught=="0000000010000";
gen grade_combo78=0;
replace grade_combo78=1 if grades_taught=="0000000110000";
gen grade_combo_other=0;
replace grade_combo_other=1 if grade_combo0==0 & grade_combo1==0 & grade_combo2==0 & grade_combo3==0 & grade_combo4==0 
 & grade_combo5==0 & grade_combo6==0 & grade_combo7==0 & grade_combo8==0 & grade_combo78==0;
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0 & s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(TFNLWGT), by(state survey_year);
save "${data}t_lunch50ormore_assign_elmid.dta", replace;
restore;

clear;

use "${data}final_principals.dta";
preserve;
keep if primary_sample==1;
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_assign_elem.dta", replace;
restore;
preserve;
keep if middle_sample==1;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_assign_middle.dta", replace;
restore;
preserve;
keep if high_sample==1;
gen s_grade_combo712=0;
gen s_grade_combo79=0;
gen s_grade_combo912=0;
gen s_grade_combo1012=0;
gen s_grade_combo_other=0;
replace s_grade_combo712=1 if school_span=="0000000111111";
replace s_grade_combo79=1 if school_span=="0000000111000";
replace s_grade_combo912=1 if school_span=="0000000001111";
replace s_grade_combo1012=1 if school_span=="0000000000111";
replace s_grade_combo_other=1 if s_grade_combo712==0 & s_grade_combo79==0 & s_grade_combo912==0 & s_grade_combo1012==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_assign_high.dta", replace;
restore;
preserve; 
keep if primary_sample==1 | middle_sample==1;
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0 & s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_assign_elmid.dta", replace;
restore;

preserve;
keep if lunch50_or_more_sample==1 & primary_sample==1;
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_lunch50ormore_assign_elem.dta", replace;
restore;
preserve;
keep if lunch50_or_more_sample==1 & middle_sample==1;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_lunch50ormore_assign_middle.dta", replace;
restore;
preserve;
keep if lunch50_or_more_sample==1 & high_sample==1;
gen s_grade_combo712=0;
gen s_grade_combo79=0;
gen s_grade_combo912=0;
gen s_grade_combo1012=0;
gen s_grade_combo_other=0;
replace s_grade_combo712=1 if school_span=="0000000111111";
replace s_grade_combo79=1 if school_span=="0000000111000";
replace s_grade_combo912=1 if school_span=="0000000001111";
replace s_grade_combo1012=1 if school_span=="0000000000111";
replace s_grade_combo_other=1 if s_grade_combo712==0 & s_grade_combo79==0 & s_grade_combo912==0 & s_grade_combo1012==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_lunch50ormore_assign_high.dta", replace;
restore;
preserve; 
keep if lunch50_or_more_sample==1 & (primary_sample==1 | middle_sample==1);
gen s_grade_combok8=0;
gen s_grade_combok6=0;
gen s_grade_combok5=0;
gen s_grade_combok4=0;
gen s_grade_combok3=0;
gen s_grade_combok2=0;
gen s_grade_combo78=0;
gen s_grade_combo678=0;
gen s_grade_combo_other=0;
replace s_grade_combok8=1 if school_span=="1111111110000";
replace s_grade_combok6=1 if school_span=="1111111000000";
replace s_grade_combok5=1 if school_span=="1111110000000";
replace s_grade_combok4=1 if school_span=="1111100000000";
replace s_grade_combok3=1 if school_span=="1111000000000";
replace s_grade_combok2=1 if school_span=="1110000000000";
replace s_grade_combo78=1 if school_span=="0000000110000";
replace s_grade_combo678=1 if school_span=="0000001110000";
replace s_grade_combo_other=1 if s_grade_combok8==0 & s_grade_combok6==0 | s_grade_combok5==0 & s_grade_combok4==0 &
 s_grade_combok3==0 & s_grade_combok2==0 & s_grade_combo78==0 & s_grade_combo678==0;
egen num_obs=count(AFNLWGT), by(state survey_year);
save "${data}p_lunch50ormore_assign_elmid.dta", replace;
restore;

clear;

/******************************STANDARDIZE VARIABLES************************************/
/*This final section norms some of the variables by year (mean 0, SD 1) and combines some variables into composites*/

foreach f in assign_elem assign_middle assign_high assign_elmid
 lunch50ormore_assign_elem lunch50ormore_assign_middle lunch50ormore_assign_high lunch50ormore_assign_elmid {;

  use "${data}t_`f'.dta";

    /*TEACHER AUTONOMY*/
    global t_autonomy "control_mater control_cont control_tech control_eval control_disc control_hw";
    foreach t of global t_autonomy {;
      gen z_`t' = .;
      label var z_`t' "standardized (within year) version of variable";
      foreach y in 1993 1999 2003 2007 {;
        sum `t' if survey_year==`y' [w=TFNLWGT];
        local mean_`y' = r(mean);
        local sd_`y' = r(sd);
        replace z_`t' = (`t' - `mean_`y'')/(`sd_`y'') if survey_year==`y';
        };
      };
    gen autonomy = .;
    replace autonomy = (z_control_mater + z_control_cont + z_control_tech + z_control_eval + z_control_disc + z_control_hw)/6;
    label var autonomy "composite of teacher control questions";
    /*NOTE: A high value on this composite indicates that teacher has a great deal of control in the areas of interest*/
	
	/*Standardized Variables*/
	foreach v in sch_disc stu_cult resource p_tardiness p_absent p_classcut p_dropout p_apathy p_preparation {;
	 gen z_`v' = .;
	 sum `v' [w=TFNLWGT] if survey_year==1993;
	 replace z_`v' = (`v' - r(mean))/r(sd);
	};
	/*Note: For stu_cult, I high value indicates that tardiness, absenteeism, etc. are LESS of a problem --
	in other words, a high value means BETTER engagement. This is what we want.
	For sch_disc, a high value indicates that the teacher DISAGREES that teachers or principal enforce discipline --
	in other words, a high value means WORSE discipline. This is NOT what we want -- so the next step is to reverse
	the sign on this variable*/
	replace z_sch_disc = -z_sch_disc;
	 

    save "${data}t_`f'.dta", replace;
	
};
