
/***********************************************************************************************
This do file cleans the 03-04 SASS data
***********************************************************************************************/

# delimit;
capture log close;
capture program drop all;
clear;
clear matrix;
set mem 500M;

global data "C:/Users/nschwart/Desktop/nclb_final/sass_folder/raw_data/";use "${data}pubtea03.dta";

/*Rename relevant variables*/
rename T0065 ungraded;
rename T0051 pkind;
rename T0052 kind;
rename T0053 first;
rename T0054 second;
rename T0055 third;
rename T0056 fourth;
rename T0057 fifth;
rename T0058 sixth;
rename T0059 seventh;
rename T0060 eighth;
rename T0061 ninth;
rename T0062 tenth;
rename T0063 eleventh;
rename T0064 twelfth;
rename T0076 d_numclasses;
rename T0079 class_size1;
rename T0082 class_size2;
rename T0085 class_size3;
rename T0088 class_size4;
rename T0091 class_size5;
rename T0094 class_size6;
rename T0097 class_size7;
rename T0100 class_size8;
rename T0103 class_size9;
rename T0106 class_size10;
rename T0166 cert_details;
rename TLEV2_03 TEALEV2;
rename T0026 assign;
rename T0068 sc_numstu;
rename T0070 sc_hrs_eng;
rename T0072 sc_hrs_math;
rename T0073 sc_hrs_ss;
rename T0074 sc_hrs_sci;
rename T0116 ba_dummy;
rename T0123 ma_dummy;
rename T0298 hrs_per_wk;
rename T0399 salary;
rename T0400 add_salary_dummy;
rename T0401 add_salary_amt;
rename T0402 bonus_dummy;
rename T0403 bonus_amt;
rename T0318 control_mater;
rename T0319 control_cont;
rename T0320 control_tech;
rename T0321 control_eval;
rename T0322 control_disc;
rename T0323 control_hw;
rename T0331 ad_admin_supp;
rename T0332 ad_salary_sat;
rename T0333 ad_misbehav;
rename T0334 ad_par_supp;
rename T0335 ad_mater_ad;
rename T0336 ad_otherdut;
rename T0337 ad_prin_disc;
rename T0338 ad_tch_disc;
rename T0339 ad_shareval;
rename T0341 ad_coop;
rename T0342 ad_staffrec;
rename T0348 ad_tardiness;
rename T0350 ad_satisfied;
rename T0364 p_tardiness;
rename T0365 p_absent;
rename T0366 p_classcut;
rename T0367 p_tch_absent;
rename T0369 p_dropout;
rename T0370 p_apathy;
rename T0371 p_parent;
rename T0372 p_poverty;
rename T0373 p_preparation;
rename T0374 p_stu_health;
rename T0383 stay_tch;
rename T0407 union_dummy;
rename T0408 male;
rename T0416 yob;
rename T0066 class_org;

/*Race coding*/
gen hispanic = 0;
replace hispanic = 1 if RACETH_T==1 | RACETH_T==2 | RACETH_T==4 | RACETH_T==7 | RACETH_T==8 | RACETH_T==9 | RACETH_T==16 | RACETH_T==17
 | RACETH_T==20 | RACETH_T==22 | RACETH_T==24 | RACETH_T==25 | RACETH_T==31;
replace hispanic=. if RACETH_T==. | RACETH_T<0;
gen white_nh=0;
label var white_nh "White, not Hispanic";
replace white_nh = 1 if RACETH_T==47;
replace white_nh=. if RACETH_T==. | RACETH_T<0;
gen black_nh = 0;
label var black_nh "Black, not Hispanic";
replace black_nh = 1 if RACETH_T==39 | RACETH_T==40 | RACETH_T==42 | RACETH_T==43;
replace black_nh=. if RACETH_T==. | RACETH_T<0;
gen other=0;
replace other=1 if RACETH_T==32 | RACETH_T==33 | RACETH_T==34 | RACETH_T==35 | RACETH_T==36 | RACETH_T==37 | RACETH_T==48
 | RACETH_T==49 | RACETH_T==51 | RACETH_T==53 | RACETH_T==55 | RACETH_T==56 | RACETH_T==62;
 replace other=. if RACETH_T==.  | RACETH_T<0;

/*Keep only Full-time, non-pull-out teachers*/
keep if assign==1;
drop assign;
drop if class_org==5;

/*recode relevant variables*/
replace ungraded=0 if ungraded==. | ungraded==-8 | ungraded==-9;
replace pkind=0 if pkind==. | pkind==-8 | pkind==-9;
replace kind=0 if kind==. | kind==-8 | kind==-9;
replace first=0 if first==. | first==-8 | first==-9;
replace second=0 if second==. | second==-8 | second==-9;
replace third=0 if third==. | third==-8 | third==-9;
replace fourth=0 if fourth==. | fourth==-8 | fourth==-9;
replace fifth=0 if fifth==. | fifth==-8 | fifth==-9;
replace sixth=0 if sixth==. | sixth==-8 | sixth==-9;
replace seventh=0 if seventh==. | seventh==-8 | seventh==-9;
replace eighth=0 if eighth==. | eighth==-8 | eighth==-9;
replace ninth=0 if ninth==. | ninth==-8 | ninth==-9;
replace tenth=0 if tenth==. | tenth==-8 | tenth==-9;
replace eleventh=0 if eleventh==. | eleventh==-8 | eleventh==-9;
replace twelfth=0 if twelfth==. | twelfth==-8 | twelfth==-9;
replace d_numclasses=. if d_numclasses==-8 | d_numclasses==-9;
replace class_size1=. if class_size1==-8 | class_size1==-9 | class_size1==0;
replace class_size2=. if class_size2==-8 | class_size2==-9 | class_size2==0;
replace class_size3=. if class_size3==-8 | class_size3==-9 | class_size3==0;
replace class_size4=. if class_size4==-8 | class_size4==-9 | class_size4==0;
replace class_size5=. if class_size5==-8 | class_size5==-9 | class_size5==0;
replace class_size6=. if class_size6==-8 | class_size6==-9 | class_size6==0;
replace class_size7=. if class_size7==-8 | class_size7==-9 | class_size7==0;
replace class_size8=. if class_size8==-8 | class_size8==-9 | class_size8==0;
replace class_size9=. if class_size9==-8 | class_size9==-9 | class_size9==0;
replace class_size10=. if class_size10==-8 | class_size10==-9 | class_size10==0;
replace sc_numstu=. if sc_numstu==-8 | sc_numstu==-9 | sc_numstu==0;
replace sc_hrs_eng=. if sc_hrs_eng==-8 | sc_hrs_eng==-9;
replace sc_hrs_math=. if sc_hrs_math==-8 | sc_hrs_math==-9;
replace sc_hrs_ss=. if sc_hrs_ss==-8 | sc_hrs_ss==-9;
replace sc_hrs_sci=. if sc_hrs_sci==-8 | sc_hrs_sci==-9;
replace class_size1=. if class_size1>99;
replace class_size2=. if class_size2>99;
replace class_size3=. if class_size3>99;
replace class_size4=. if class_size4>99;
replace class_size5=. if class_size5>99;
replace class_size6=. if class_size6>99;
replace class_size7=. if class_size7>99;
replace class_size8=. if class_size8>99;
replace class_size9=. if class_size9>99;
replace class_size10=. if class_size10>99;
replace sc_numstu=. if sc_numstu>99;
replace cert_details = . if cert_details==8 | cert_details==-9;
replace ba_dummy=. if ba_dummy==-8 | ba_dummy==-9;
replace ba_dummy=0 if ba_dummy==2;
replace ma_dummy=. if ma_dummy==-8 | ma_dummy==-9;
replace ma_dummy=0 if ma_dummy==2;
replace hrs_per_wk=. if hrs_per_wk==8 | hrs_per_wk==-9;
replace salary = . if salary==-8 | salary==-9;
replace add_salary_dummy=. if add_salary_dummy==-8 | add_salary_dummy==-9;
replace add_salary_dummy=0 if add_salary_dummy==2;
replace add_salary_amt=0 if add_salary_dummy==0;
replace add_salary_amt=. if add_salary_amt==-8 | add_salary_amt==-9;
replace bonus_dummy=. if bonus_dummy==-8 | bonus_dummy==-9;
replace bonus_dummy=0 if bonus_dummy==2;
replace bonus_amt=0 if bonus_dummy==0;
replace bonus_amt=. if bonus_amt==-8 | bonus_amt==-9;
replace control_mater=. if control_mater==-8 | control_mater==-9;
replace control_cont=. if control_cont==-8 | control_cont==-9;
replace control_tech=. if control_tech==-8 | control_tech==-9;
replace control_eval=. if control_eval==-8 | control_eval==-9;
replace control_disc=. if control_disc==-8 | control_disc==-9;
replace control_hw=. if control_hw==-8 | control_hw==-9;
replace ad_admin_supp = . if ad_admin_supp==-8 | ad_admin_supp==-9;
replace ad_salary_sat = . if ad_salary_sat==-8 | ad_salary_sat==-9;
replace ad_misbehav=. if ad_misbehav==-8 | ad_misbehav==-9;
replace ad_par_supp = . if ad_par_supp==-8 | ad_par_supp==-9;
replace ad_mater_ad = . if ad_mater_ad==-8 | ad_mater_ad==-9;
replace ad_otherdut=. if ad_otherdut==-8 | ad_otherdut==-9;
replace ad_prin_disc = . if ad_prin_disc==-8 | ad_prin_disc==-9;
replace ad_tch_disc = . if ad_tch_disc==-8 | ad_tch_disc==-9;
replace ad_shareval= . if ad_shareval==-8 | ad_shareval==-9;
replace ad_coop=. if ad_coop==-8 | ad_coop==-9;
replace ad_staffrec=. if ad_staffrec==-8 | ad_staffrec==-9;
replace ad_tardiness=. if ad_tardiness==-8 | ad_tardiness==-9;
replace ad_satisfied=. if ad_satisfied==-8 | ad_satisfied==-9;
replace p_tardiness=. if p_tardiness==-8 | p_tardiness==-9;
replace p_absent=. if p_absent==-8 | p_absent==-9;
replace p_classcut=. if p_classcut==-8 | p_classcut==-9;
replace p_tch_absent=. if p_tch_absent==-8 | p_tch_absent==-9;
replace p_dropout=. if p_dropout==-8 | p_dropout==-9;
replace p_apathy=. if p_apathy==-8 | p_apathy==-9;
replace p_parent=. if p_parent==-8 | p_parent==-9;
replace p_poverty=. if p_poverty==-8 | p_poverty==-9;
replace p_preparation=. if p_preparation==-8 | p_preparation==-9;
replace p_stu_health=. if p_stu_health==-8 | p_stu_health==-9;
replace stay_tch=. if stay_tch==-8 | stay_tch==-9;
replace union_dummy=. if union_dummy==-8 | union_dummy==-9;
replace union_dummy=0 if union_dummy==2;
replace male=. if male==-8 | male==-9;
replace male = 0 if male==2;
replace class_org = . if class_org==-8 | class_org==-9;
replace class_org = 0 if class_org==1 | class_org==2;
replace class_org = 1 if class_org==3 | class_org==4;
label define sc 0 "departmentalized" 1 "self-contained or team-taught";
label values class_org sc;
rename class_org self_contained;
replace CHARFLAG = 0 if CHARFLAG==2;

/*Generate average class sizes*/
gen d_avg_class = (class_size1+class_size2+class_size3+class_size4+class_size5+class_size6+class_size7+class_size8+class_size9+class_size10)/d_numclasses;
gen avg_class = .;
replace avg_class = sc_numstu if self_contained==1;
replace avg_class = d_avg_class if self_contained==0;

/*Drop ungraded teachers*/
drop if ungraded==1 & pkind~=1 & kind~=1 & first~=1 & second~=1 & third~=1 & fourth~=1 & fifth~=1 & sixth~=1 & seventh~=1
 & eighth~=1 & ninth~=1 & tenth~=1 & eleventh~=1 & twelfth~=1;

/*Generate certification variables*/
gen reg_cert = 0;
replace reg_cert = 1 if cert_details==1;
gen uncert = 0;
replace uncert = 1 if cert_details==6;
gen other_cert = 0;
replace other_cert = 1 if cert_details==2 | cert_details==3 | cert_details==4 | cert_details==5;

destring STATE, replace;

label define states 
1 "AL" 2 "AK" 4 "AZ" 5 "AR" 6 "CA" 8 "CO" 9 "CT" 10 "DE" 11 "DC" 12 "FL" 13 "GA" 15 "HI" 16 "ID" 17 "IL" 18 "IN"
19 "IA" 20 "KS" 21 "KY" 22 "LA" 23 "ME" 24 "MD" 25 "MA" 26 "MI" 27 "MN" 28 "MS" 29 "MO" 30 "MT" 31 "NE" 32 "NV" 33 "NH"
34 "NJ" 35 "NM" 36 "NY" 37 "NC" 38 "ND" 39 "OH" 40 "OK" 41 "OR" 42 "PA" 43 "PR" 44 "RI" 45 "SC" 46 "SD" 47 "TN" 48 "TX" 
49 "UT" 50 "VT" 51 "VA" 53 "WA" 54 "WV" 55 "WI" 56 "WY";
label values STATE states;

gen survey_year = 2003;

gen elem_level=0;
replace elem_level=1 if kind==1 | first==1 | second==1 | third==1 | fourth==1 | fifth==1;
gen middle_level=0;
replace middle_level=1 if sixth==1 | seventh==1 | eighth==1;
gen high_level=0;
replace high_level=1 if ninth==1 | tenth==1 | eleventh==1 | twelfth==1;

save "${data}cleaned_0304.dta", replace;
