# delimit ;
capture log close;
log using d:\statprog\dropout\inputall1.log, replace;

****************

INPUTALL1

****************

This program inputs data from several datasets (NELS, ELS, NLSY79 AND NLSY97) 
tocompile a large sample of high school students and follow them through
the process of dropping out or graduating from high school;

clear;

use d:\data\nces\nels\dropout\nelsdata.dta;

gen source = "nels";
gen yrsoph = 1990;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20
	 nodegreeby20 dipgedby20 educoutby20 mathstdsc readstdsc histstdsc
	 scistdsc colgrad;

tab source;
	 
save d:\data\nces\alldata.dta, replace;
clear;

use d:\data\nces\els\dropout\elsdata.dta;

gen source = "els";
gen yrsoph = 2002;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20 
	 nodegreeby20 dipgedby20 educoutby20 mathstdsc readstdsc;

tab source;

append using d:\data\nces\alldata.dta;
save d:\data\nces\alldata.dta, replace;

clear;

use d:\data\nces\hsb\dropout\hsball.dta;

rename weight smpwgt;
gen source = "hsb";
gen yrsoph = 1980;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

tab source;

append using d:\data\nces\alldata.dta;
save d:\data\nces\alldata.dta, replace;

clear;

use d:\data\nlsy\dropout\nlsydata.dta;

gen source = "nlsy";
gen yrsoph = yrage20 - 4;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20
	 nodegreeby20 dipgedby20 educoutby20 hgcage20 colgrad afqt afqtadj;

tab source;

append using d:\data\nces\alldata.dta;
save d:\data\nces\alldata.dta, replace;

clear;
use d:\data\nlsy97\dropout\nlsydata.dta;

gen source = "nlsy97";
gen yrsoph = yrage20 - 4;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20
	 nodegreeby20 dipgedby20 educoutby20 hgcage20 colgrad afqt afqtadj;

tab source;
 
append using d:\data\nces\alldata.dta;
save d:\data\nces\alldata.dta, replace;

tab source;

gen mtredcat = 1 if mtrhsdrop == 1;
     replace mtredcat = 2 if mtrhsgrad == 1;
	 replace mtredcat = 3 if mtranycol == 1;
	 
label define mtredlbl 1 "mtr hsdrop" 2 "mtr hsgrad" 3 "mtr anycol";
label values mtredcat mtredlbl;

gen highineq = stname == "DC" | stname == "LA" | stname == "AL" | 
               stname == "NY" | stname == "GA" | stname == "MS" |
			   stname == "KY" | stname == "MA" | stname == "SC" | 
			   stname == "TN" | stname == "TX" | stname == "RI" | 
			   stname == "IL";
gen lowineq  = stname == "UT" | stname == "NV" | stname == "VT" | 
               stname == "ID" | stname == "NH" | stname == "NE" |
			   stname == "IA" | stname == "WI" | stname == "AK" |
			   stname == "OR" | stname == "WY" | stname == "ME" |
			   stname == "IN";
			   
gen midineq = highineq == 0 & lowineq == 0;

sort stfips;
save d:\data\nces\alldata.dta, replace;

clear;
use d:\data\nces\incstats_state.dta;
sort stfips;

merge 1:m stfips using d:\data\nces\alldata.dta;
tab _merge;

drop if _merge ~= 3;
drop _merge;
sort stfips;
save d:\data\nces\alldata.dta, replace;

*Organize and merge on Chetty's mobility data and other data describing
characteristics of commuting zones;

clear;
use d:\data\nces\mobility_cz.dta;
 
rename s_rank relmobility;
rename e_rank_b absmobility;
rename prob_p1_k5 probmoveup;

sort cz;
save d:\data\nces\temp1.dta, replace;

clear;
use d:\data\nces\chettydata_apptab8.dta;

keep ccd_exp_tot ccd_pup_tch_r~o crime_violent cs_born_foreign cs_divorced 
cs_elf_ind_man cs_fam_wkidsi~m cs_labforce cs_married cs_race_bla 
cs_race_th~2000 cs00_seg_inc~25 cs00_seg_inc~75 cz czname d_tradeusc~1990 
dropout_r eitc_exposure frac_middlecl~s frac_travelt~15 frac_worked1416 gini 
gini99 gradrate_r hhinc00 inc_share_1perc incgrowth0010 intersects_msa 
mig_inflow mig_outflow num_inst_pc scap_ski90pcm score_r state 
stateabbrv subcty_total_~c tax_st_diff_~20 taxrate tuition cs00_seg_inc ;

rename  ccd_exp_tot schexpstud ;
rename  ccd_pup_tch_r~o studtchratio ;
rename  crime_violent violcrimerat ;
rename  cs_born_foreign pctforeign ;
rename  cs_divorced fracdivorced ;
rename  cs_elf_ind_man fracmanuf ;
rename  cs_fam_wkidsi~m fracsingpar ;
rename  cs_labforce lfpr ;
rename  cs_married fracmarrd ;
rename  cs_race_bla fracblack ;
rename  cs_race_th~2000 racesegind ;
rename  cs00_seg_inc~25 segpovind ;
rename  cs00_seg_inc~75 segaffind ;
rename  d_tradeusc~1990 grimpchina ;
rename  dropout_r hsdroprate ;
rename  eitc_exposure eitcexp ;
rename  frac_middlecl~s fracmidcls ;
rename  frac_travelt~15 commute15 ;
rename  frac_worked1416 lfpr1416 ;
rename  gini gini ;
rename  gini99 gini99 ;
rename  gradrate_r colgradrat ;
rename  hhinc00 hhinc ;
rename  inc_share_1perc incshare1pct ;
rename  incgrowth0010 incgrowth ;
rename  cs00_seg_inc incseg ;
rename  intersects_msa urban ;
rename  num_inst_pc numcolpc ;
rename  scap_ski90pcm soccapind ;
rename  score_r testscore ;
rename  subcty_total_~c localexpcap ;
rename  tax_st_diff_~20 sttaxprog ;
rename  taxrate loctaxrate ;
rename  tuition statetuition ;

sort cz;
merge cz using d:\data\nces\temp1.dta;
tab _merge;
drop _merge;

sort state;
collapse (mean) relmobility absmobility probmoveup incseg schexpstud 
studtchratio violcrimerat pctforeign fracdivorced fracmanuf fracsingpar 
lfpr fracmarrd fracblack racesegind segpovind segaffind grimpchina hsdroprate 
eitcexp fracmidcls commute15 lfpr1416 gini gini99 colgradrat hhinc incshare1pct
incgrowth urban numcolpc soccapind testscore localexpcap sttaxprog 
loctaxrate statetuition [weight=pop2000], by(state);

label var schexpstud  " school expenditures per student ";
label var studtchratio  " student teacher ratio ";
label var violcrimerat  " violent crime rate ";
label var pctforeign  " percent foreign born ";
label var fracdivorced  " fraction divorced ";
label var fracmanuf  " fraction of workers in manufacturing ";
label var fracsingpar  " fraction single parents ";
label var lfpr  " labor force participation rate ";
label var fracmarrd  " fraction married ";
label var fracblack  " fraction black ";
label var racesegind  " racial segregation index ";
label var segpovind  " segregation of poverty index ";
label var segaffind  " segregation of affluence index ";
label var grimpchina  " growth rate in Chinese imports ";
label var hsdroprate  " HS dropout rate, residual ";
label var eitcexp  " eitc exposure ";
label var fracmidcls  " fraction of families middle class ";
label var commute15  " frac. workers with < 15 min commute ";
label var lfpr1416  " lfpr ages 14-16 ";
label var gini  " gini coefficient  ";
label var gini99  " gini coefficient dropping top 1% ";
label var colgradrat  " col graduate rate, residual ";
label var hhinc  " hh income per capital ";
label var incshare1pct  " income share top 1 percent ";
label var incgrowth  " income growth rate ";
label var incseg  " income segregation index ";
label var urban  " urban cz ";
label var numcolpc  " number of colleges per capita ";
label var soccapind  " social capital index ";
label var testscore  " std. test scores English ";
label var localexpcap  " local expenditures per capita ";
label var sttaxprog  " progressivity of state tax ";
label var loctaxrate  " average local tax rate ";
label var statetuition  " in-state tuition ";

sort relmobility;
list state relmobility absmobility;

sort state;
save d:\data\nces\tempmob.dta, replace;

clear;
use d:\statprog\healtheduc\convstate.dta;
replace state = "District of Columbia" if state == "D.C.";
sort state;
merge state using d:\data\nces\tempmob.dta;
tab _merge;
drop _merge;

rename statefips stfips;
keep stfips stname relmobility absmobility probmoveup incseg schexpstud 
studtchratio violcrimerat pctforeign fracdivorced fracmanuf fracsingpar 
lfpr fracmarrd fracblack racesegind segpovind segaffind grimpchina hsdroprate 
eitcexp fracmidcls commute15 lfpr1416 gini gini99 colgradrat hhinc incshare1pct
incgrowth urban numcolpc soccapind testscore localexpcap sttaxprog 
loctaxrate statetuition ;

sort stfips;
merge 1:m stfips using d:\data\nces\alldata.dta;
tab _merge;

drop if _merge ~= 3;
drop _merge;

gen highamob = stname == "ND" | stname == "WY" | stname == "SD" | 
               stname == "IA" | stname == "UT" | stname == "NE" |
			   stname == "MT" | stname == "MN" | stname == "ID" | 
			   stname == "KS" | stname == "NJ" | stname == "MA" | 
			   stname == "NH";
gen lowamob  = stname == "GA" | stname == "NC" | stname == "SC" | 
               stname == "MS" | stname == "TN" | stname == "DE" |
			   stname == "AL" | stname == "MI" | stname == "OH" |
			   stname == "MD" | stname == "VA" | stname == "FL" |
			   stname == "KY";
gen highrmob = stname == "HI" | stname == "CA" | stname == "UT" | 
               stname == "ID" | stname == "MT" | stname == "WY" |
			   stname == "NV" | stname == "ND" | stname == "AK" | 
			   stname == "WA" | stname == "OR" | stname == "CO" | 
			   stname == "SD";
gen lowrmob  = stname == "MS" | stname == "MD" | stname == "LA" | 
               stname == "OH" | stname == "AL" | stname == "DE" |
			   stname == "NC" | stname == "SC" | stname == "IL" |
			   stname == "VA" | stname == "IN" | stname == "TN" |
			   stname == "MO";
gen midrmob = highrmob == 0 & lowrmob == 0;
gen midamob = highamob == 0 & lowamob == 0;

sort stname yrsoph;
save d:\data\nces\alldata.dta, replace;
erase d:\data\nces\tempmob.dta;

* merge on policy variables;

clear;
use d:\data\nces\statedata.dta;
drop state stfips statefips statecps stateord statefip state_code 
     state_name;
rename year yrsoph;
sort stname yrsoph;
merge 1:m stname yrsoph using d:\data\nces\alldata.dta;
tab _merge;
drop if _merge ~= 3;
drop _merge;

gen maxbenmis = rmaxben3 == .;
replace rmaxben3 = 0 if rmaxben3 == .;

sort stname yrsoph; 
save d:\data\nces\alldata.dta, replace;

clear;
use d:\data\nces\minwage.dta;
drop state;
rename year yrsoph;
sort stname yrsoph;
merge 1:m stname yrsoph using d:\data\nces\alldata.dta;
tab _merge;
drop if _merge ~= 3;
drop _merge;

sort stfips yrsoph; 
save d:\data\nces\alldata.dta, replace;

clear;
use d:\data\nces\educlaws.dta;

gen yrsoph = year - 2;
drop year;

gen exitexam1 = hsexitexam == 1;
gen exitexam2 = hsexitexam == 2;

label var exitexam1 "HS exit exam law with easier test";
label var exitexam2 "HS exit exam law with harder test";
label var hsexitexam "Dee/Jacobs coding of HS exit exams";

sort stfips yrsoph;
merge 1:m stfips yrsoph using d:\data\nces\alldata.dta;
tab yrsoph _merge;
drop if _merge ~= 3;
drop _merge;

sort stfips yrsoph;
save d:\data\nces\alldata.dta, replace;

log close;
