#delimit;
capture log close;
log using d:\statprog\dropout\inpnels1.log, replace;

clear;
clear matrix;
clear mata;
set maxvar 10000;
use d:\data\nces\nels\dropout\extract88_94.dta;

rename STU_ID studentid;                    
rename F3UNIV1 status;                   
rename F3UNIV2A howenter;                   
rename F3UNIV2B statusby;                  
rename F3UNIV2C statusf1;                   
rename F3UNIV2D statusf2;                  
rename F3UNIV2E statusf3;                  
rename SCH_ID schoolid;                     
rename BYS34A ftreduc;                    
rename BYS34B mtreduc;                     
rename BYSES sesby;                     
rename BYFAMINC famincby;                   
rename F1SES sesf1;                      
rename F1SESQ sesqf1;                    
rename F2BIRTHM birthmo;                  
rename F2BIRTHY birthyr;                   
rename F2BYFMSZ famsize;                  
rename F2BYPMAR parmarstat;                   
rename F2BYFCMP famcomp;                  
rename INT_MO intdatef3mo;                    
rename INT_YR intdatef3yr;                     
rename F3QSEX sex;                    
rename F3QRACE raceeth;                    
rename YRRECM yrrecdipmo;                     
rename YRRECY yrrecdipyr;                     
rename F3PNLWT smpwgt;                   
rename F3DIPLOM dipged;                   

label var studentid "id number of student";
label var status "status of student in all years";
label var howenter "how student entered survey";
label var statusby "student's status in base year";
label var statusf1 "student's status in first follow up";
label var statusf2 "student's status in second follow up";
label var statusf3 "student's status in third follow up";
label var schoolid "school's id number";
label var ftreduc "father's years of education";
label var mtreduc "mother's years of education";
label var sesby "family SES in base year";
label var famincby "family income in base year";
label var sesf1 "family SES in first follow up";
label var sesqf1 "family SES quartile in first follow up";
label var birthmo "birth month";
label var birthyr "birth year";
label var famsize "family size in base year";
label var parmarstat "parents' marital status in base year";
label var famcomp "family composition in base year";
label var intdatef3mo "interview date at 3rd follow up - month";
label var intdatef3yr "interview date at 3rd follow up - year";
label var sex "sex of respondent";
label var raceeth "race and ethnicity of respondent";
label var yrrecdipmo "month received HS diploma";
label var yrrecdipyr "year received HS diploma";
label var smpwgt "sample weight";
label var dipged "received diploma or GED";

*drop if smpwgt == 0;
sum studentid;

gen female = sex == 2;
gen whitenh = raceeth == 4;
gen hispanic = raceeth == 2;
gen blacknh = raceeth == 3;
gen othernh = whitenh == 0 & blacknh == 0 & hispanic == 0;

gen age94 = round(((mdy(intdatef3mo,15,1900+intdatef3yr) - 
                mdy(birthmo,15,1900+birthyr))/365.25),1);
replace age94 = . if age94 < 0;

destring famcomp mtreduc ftreduc famsize, replace;
gen bothpar = famcomp == 1;
     replace bothpar = . if famcomp < 0;
gen mtrhsdrop = mtreduc == 1;
     replace mtrhsdrop = . if mtreduc > 7;
gen mtrhsgrad = mtreduc == 2;
     replace mtrhsgrad = . if mtreduc > 7;
gen mtranycol = mtreduc >= 3 & mtreduc <= 7;
     replace mtranycol = . if mtreduc > 7;
gen ftrhsdrop = ftreduc == 1;
     replace ftrhsdrop = . if ftreduc > 7;
gen ftrhsgrad = ftreduc == 2;
     replace ftrhsgrad = . if ftreduc > 7;
gen ftranycol = ftreduc >= 3 & ftreduc <= 7;
     replace ftranycol = . if ftreduc > 7;
replace famsize = . if famsize > 10;

gen diplomaby20 = dipged == 1;
     replace diplomaby20 = . if dipged < 0;
gen gedby20 = dipged == 2 | dipged == 3;
     replace gedby20 = . if dipged < 0;
gen nodegreeby20 = dipged >= 4;
gen  educoutby20 = "hsdrop" if nodegreeby20 == 1;
     replace educoutby20 = "ged" if gedby20 == 1;
	 replace educoutby20 = "hsgrad" if diplomaby20 == 1;
gen dipgedby20 = 0;
     replace dipgedby20 = 1 if diplomaby20 == 1 | gedby20 == 1;
     replace dipgedby20 = . if diplomaby20 == . | gedby20 == .;

sort studentid;
save d:\data\nces\nels\temp1.dta, replace;

* input more data on college completion;

clear;
use D:\Data\nces\nels\dropout\NELS_88_00_BYF4STU_V1_0.dta;
keep STU_ID F4EDGR1 F4EDGR2 F4EDGR3 F4EDGR4 F4EDGR5 F4EDGR6;
rename STU_ID studentid;
rename F4EDGR1 degree1;
rename F4EDGR2 degree2;
rename F4EDGR3 degree3;
rename F4EDGR4 degree4;
rename F4EDGR5 degree5;
rename F4EDGR6 degree6;

gen colgrad = degree1 >= 3 & degree1 <= 6;
     replace colgrad = 1 if degree2 >= 3 & degree2 <= 6;
     replace colgrad = 1 if degree3 >= 3 & degree3 <= 6;
     replace colgrad = 1 if degree4 >= 3 & degree4 <= 6;
     replace colgrad = 1 if degree5 >= 3 & degree5 <= 6;
     replace colgrad = 1 if degree6 >= 3 & degree6 <= 6;
	 
keep studentid colgrad;
sort studentid;
merge studentid using d:\data\nces\nels\temp1.dta;
tab _merge;
keep if _merge == 3;
drop _merge;

sort studentid;
save d:\data\nces\nels\temp1.dta, replace;

* input test score data;

clear;
use D:\Data\nces\nels\NELS_88_94_STMEG3_V1_0.dta;
keep STU_ID BY2XMSTD BY2XHSTD BY2XRSTD BY2XSSTD;

rename STU_ID studentid;
rename BY2XMSTD mathstdsc;
rename BY2XRSTD readstdsc;
rename BY2XHSTD histstdsc;
rename BY2XSSTD scistdsc;

label var mathstdsc "standardized score on math test";
label var readstdsc "standardized score on read test";
label var histstdsc "standardized score on hist test";
label var scistdsc "standardized score on sci test";

destring mathstdsc readstdsc histstdsc scistdsc, replace;
drop if mathstdsc > 80;
drop if readstdsc > 80;
drop if histstdsc > 80;
drop if scistdsc > 80;

sort studentid;
merge studentid using d:\data\nces\nels\temp1.dta;
tab _merge;
drop _merge;

sort studentid;
save d:\data\nces\nels\temp1.dta, replace;


clear;
infile using d:\data\nces\nels\dropout\inpnelsgeo.dct;
drop if stname == "99";

sort stname;
save d:\data\nces\nels\temp2.dta, replace;

clear;
use d:\data\nces\nels\convstate.dta;
sort stname;
merge stname using d:\data\nces\nels\temp2.dta;
tab _merge;
drop _merge;
rename statefips stfips;
rename zip4follow zipf4;
label var zipf4 "zip code in fourth follow up (2000)";
keep studentid stname stfips zipf4;

sort studentid;
merge studentid using d:\data\nces\nels\temp1.dta;
tab _merge;
keep if _merge == 3;
drop _merge;

sort zipf4;
save d:\data\nces\nels\temp1.dta, replace;

clear;
use d:\data\nces\crosswalk-zip-county.dta;

rename zip zipf4;
rename cntyfips cntyfipsf4;
rename stfips stfipsf4;

sort zipf4;
merge zipf4 using d:\data\nces\nels\temp1.dta;
tab _merge;
drop if _merge == 1;
drop _merge;

sort stfipsf4 cntyfipsf4;
save d:\data\nces\nels\temp1.dta, replace;

clear;
use d:\data\nces\crosswalk-county-msa;

destring msa, replace;
rename stfips stfipsf4;
rename cntyfips cntyfipsf4;
keep stfipsf4 cntyfipsf4 msa;
replace msa = . if msa == 0;

sort stfipsf4 cntyfipsf4;
merge stfipsf4 cntyfipsf4 using d:\data\nces\nels\temp1.dta;
tab _merge;
drop if _merge == 1;
drop _merge;

save d:\data\nces\nels\dropout\nelsdata.dta, replace;
erase d:\data\nces\nels\temp1.dta;
erase d:\data\nces\nels\temp2.dta;

log close;


