# delimit ;
capture log close;
log using d:\statprog\dropout\inputall1-msa2.log, replace;

****************

INPUTALL1

****************
/*
This program inputs data from several datasets (NELS, ELS, NLSY79 AND NLSY97) 
tocompile a large sample of high school students and follow them through
the process of dropping out or graduating from high school;

This program is the same as INPUTALL1 except that the inequality data is based
on MSAs, not states;
*/;

clear;

use d:\data\nces\nels\dropout\nelsdata.dta;

* note: msa data for nels is from a follow up survey, not in base year;

gen source = "nels";
gen yrsoph = 1990;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20
	 nodegreeby20 dipgedby20 educoutby20 mathstdsc readstdsc histstdsc
	 scistdsc colgrad msa;

tab source;
	 
save d:\data\nces\alldata-msa.dta, replace;
clear;

use d:\data\nces\els\dropout\elsdata.dta;

gen source = "els";
gen yrsoph = 2002;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20 
	 nodegreeby20 dipgedby20 educoutby20 mathstdsc readstdsc msa;

tab source;

append using d:\data\nces\alldata-msa.dta;
save d:\data\nces\alldata-msa.dta, replace;
clear;

use d:\data\nlsy\dropout\nlsydata.dta;

gen source = "nlsy";
gen yrsoph = yrage20 - 4;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;
drop if stfips > 56;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20
	 nodegreeby20 dipgedby20 educoutby20 hgcage20 colgrad msa;

tab source;

append using d:\data\nces\alldata-msa.dta;
save d:\data\nces\alldata-msa.dta, replace;

clear;
use d:\data\nlsy97\dropout\nlsydata.dta;

gen source = "nlsy97";
gen yrsoph = yrage20 - 4;
egen mnwgt = mean(smpwgt);
gen smpwgt2 = smpwgt/mnwgt;

keep source studentid yrsoph female whitenh hispanic blacknh othernh age 
     bothpar ftr* mtr* stname stfips smpwgt smpwgt2 diplomaby20 gedby20
	 nodegreeby20 dipgedby20 educoutby20 hgcage20 colgrad msa;

tab source;
 
append using d:\data\nces\alldata-msa.dta;
save d:\data\nces\alldata.dta-msa, replace;
	 
gen mtredcat = 1 if mtrhsdrop == 1;
     replace mtredcat = 2 if mtrhsgrad == 1;
	 replace mtredcat = 3 if mtranycol == 1;
	 
label define mtredlbl 1 "mtr hsdrop" 2 "mtr hsgrad" 3 "mtr anycol";
label values mtredcat mtredlbl;

sort msa;
save d:\data\nces\alldata-msa.dta, replace;

* add on income inequality data by MSA;

clear;
use d:\data\nces\incstats_msa.dta;
sort msa;

merge 1:m msa using d:\data\nces\alldata-msa.dta;
tab _merge;

drop if _merge ~= 3;
drop _merge;
sort msa;
save d:\data\nces\alldata-msa.dta, replace;


*Organize and merge on mobility data;

clear;
use d:\data\nces\mobility_cz.dta;

rename s_rank relmobility;
rename e_rank_b absmobility;
rename prob_p1_k5 probmoveup;

sort cz;
save d:\data\nces\temp1.dta, replace;

clear;
use d:\data\nces\chettydata_apptab8.dta;

keep cz ccd_exp_tot ccd_pup_tch_r~o crime_violent cs_born_foreign cs_divorced 
cs_elf_ind_man cs_fam_wkidsi~m cs_labforce cs_married cs_race_bla 
cs_race_th~2000 cs00_seg_inc~25 cs00_seg_inc~75 cz czname d_tradeusc~1990 
dropout_r eitc_exposure frac_middlecl~s frac_travelt~15 frac_worked1416 gini 
gini99 gradrate_r hhinc00 inc_share_1perc incgrowth0010 intersects_msa 
mig_inflow mig_outflow num_inst_pc scap_ski90pcm score_r state 
stateabbrv subcty_total_~c tax_st_diff_~20 taxrate tuition cs00_seg_inc ;

rename  ccd_exp_tot schexpstud ;
rename  ccd_pup_tch_r~o studtchratio ;
rename  crime_violent violcrimerat ;
rename  cs_born_foreign pctforeign ;
rename  cs_divorced fracdivorced ;
rename  cs_elf_ind_man fracmanuf ;
rename  cs_fam_wkidsi~m fracsingpar ;
rename  cs_labforce lfpr ;
rename  cs_married fracmarrd ;
rename  cs_race_bla fracblack ;
rename  cs_race_th~2000 racesegind ;
rename  cs00_seg_inc~25 segpovind ;
rename  cs00_seg_inc~75 segaffind ;
rename  d_tradeusc~1990 grimpchina ;
rename  dropout_r hsdroprate ;
rename  eitc_exposure eitcexp ;
rename  frac_middlecl~s fracmidcls ;
rename  frac_travelt~15 commute15 ;
rename  frac_worked1416 lfpr1416 ;
rename  gini gini ;
rename  gini99 gini99 ;
rename  gradrate_r colgradrat ;
rename  hhinc00 hhinc ;
rename  inc_share_1perc incshare1pct ;
rename  incgrowth0010 incgrowth ;
rename  cs00_seg_inc incseg ;
rename  intersects_msa urban ;
rename  num_inst_pc numcolpc ;
rename  scap_ski90pcm soccapind ;
rename  score_r testscore ;
rename  subcty_total_~c localexpcap ;
rename  tax_st_diff_~20 sttaxprog ;
rename  taxrate loctaxrate ;
rename  tuition statetuition ;

sort cz;
merge cz using d:\data\nces\temp1.dta;
tab _merge;
drop _merge;

sort cz;
save d:\data\nces\temp1.dta, replace;

clear;
use d:\data\nces\crosswalk-cz-county;

sort cz;
merge cz using d:\data\nces\temp1.dta;
tab _merge;
drop _merge;

sort stfips cntyfips;
collapse (mean) relmobility absmobility probmoveup incseg schexpstud 
studtchratio violcrimerat pctforeign fracdivorced fracmanuf fracsingpar 
lfpr fracmarrd fracblack racesegind segpovind segaffind grimpchina hsdroprate 
eitcexp fracmidcls commute15 lfpr1416 gini gini99 colgradrat hhinc incshare1pct
incgrowth urban numcolpc soccapind testscore localexpcap sttaxprog 
loctaxrate statetuition [weight=pop1990], by (stfips cntyfips);
sort stfips cntyfips;

label var schexpstud  " school expenditures per student ";
label var studtchratio  " student teacher ratio ";
label var violcrimerat  " violent crime rate ";
label var pctforeign  " percent foreign born ";
label var fracdivorced  " fraction divorced ";
label var fracmanuf  " fraction of workers in manufacturing ";
label var fracsingpar  " fraction single parents ";
label var lfpr  " labor force participation rate ";
label var fracmarrd  " fraction married ";
label var fracblack  " fraction black ";
label var racesegind  " racial segregation index ";
label var segpovind  " segregation of poverty index ";
label var segaffind  " segregation of affluence index ";
label var grimpchina  " growth rate in Chinese imports ";
label var hsdroprate  " HS dropout rate, residual ";
label var eitcexp  " eitc exposure ";
label var fracmidcls  " fraction of families middle class ";
label var commute15  " frac. workers with < 15 min commute ";
label var lfpr1416  " lfpr ages 14-16 ";
label var gini  " gini coefficient  ";
label var gini99  " gini coefficient dropping top 1% ";
label var colgradrat  " col graduate rate, residual ";
label var hhinc  " hh income per capital ";
label var incshare1pct  " income share top 1 percent ";
label var incgrowth  " income growth rate ";
label var incseg  " income segregation index ";
label var urban  " urban cz ";
label var numcolpc  " number of colleges per capita ";
label var soccapind  " social capital index ";
label var testscore  " std. test scores English ";
label var localexpcap  " local expenditures per capita ";
label var sttaxprog  " progressivity of state tax ";
label var loctaxrate  " average local tax rate ";
label var statetuition  " in-state tuition ";

save d:\data\nces\temp2.dta, replace;

clear;
use d:\data\nces\crosswalk-cz-county;

sort cz;
merge cz using d:\data\nces\temp1.dta;
tab _merge;
drop _merge;

sort stfips cntyfips;
collapse (sum) pop1990, by (stfips cntyfips);
sort stfips cntyfips;
merge stfips cntyfips using d:\data\nces\temp2.dta;
tab _merge;
drop _merge;
erase d:\data\nces\temp2.dta;

sort stfips cntyfips;
merge stfips cntyfips using d:\data\nces\crosswalk-county-msa;
tab _merge;
drop _merge;

destring msa, replace;
replace msa = . if msa == 0;

sort msa;
collapse (mean) relmobility absmobility probmoveup incseg schexpstud 
studtchratio violcrimerat pctforeign fracdivorced fracmanuf fracsingpar 
lfpr fracmarrd fracblack racesegind segpovind segaffind grimpchina hsdroprate 
eitcexp fracmidcls commute15 lfpr1416 gini gini99 colgradrat hhinc incshare1pct
incgrowth urban numcolpc soccapind testscore localexpcap sttaxprog 
loctaxrate statetuition [weight=pop1990], 
     by (msa);
sort msa;
merge msa using d:\data\nces\alldata-msa.dta;
tab _merge;
drop _merge;
	 
sort msa;
save d:\data\nces\alldata-msa.dta, replace;

log close;
