# delimit ;
capture log close;
log using d:\statprog\dropout\inpnlsy971.log, replace;

**********************

INPNLSY971

**********************

This program inputs data from the 1997 NLSY cohort capturing data on high school dropout status of the respondents along with maternal educational
attainment and other individual characteristics;

set more 1;
clear;
infile using d:\data\nlsy97\dropout\nlsy97_dropout.dct;

local xvar 
R0000100 R0536300 R0536401 R0536402 R0554500 R0554800 R1194100 R1204500 
R1204900 R1205000 R1205300 R1205400 R1235800 R1236201 R1482600 R2553500 
R2564001 R3876300 R3885601 R5453700 R5464801 R7216000 R7228501 S1531400 
S1542401 S2001000 S2012201 S3801100 S3813701 S5401000 S5412600 S5413300 
R1204400 R2563101 R3884701 R5463901 R7227601 S1541501 S2011301 S3812201
R2600401 R3958600 R5510700 R7274300 S2067100 S3861700 S5444300;

foreach x of local xvar
     {;
     replace `x' = . if `x' < 0;
	 };

rename R0000100 studentid;
rename R0536300 sex; 
rename R0536401 birthmo;
rename R0536402 birthyr;
rename R0554500 hgcmtr;
rename R0554800 hgcftr;
rename R1194100 age1997;
rename R1204400 hgc1997;
rename R1204500 faminc97;
rename R1204900 povratio97;
rename R1205000 wholiv12;
rename R1205300 relhead97;
rename R1205400 famsize;
rename R1236201 weight1997;
rename R1482600 raceeth;
rename R2553500 age1998;
rename R2563101 hgc1998;
rename R2564001 hdc1998;
rename R2600401 weight1998;
rename R3876300 age1999;
rename R3884701 hgc1999;
rename R3885601 hdc1999;
rename R3958600 weight1999;
rename R5453700 age2000;
rename R5463901 hgc2000;
rename R5464801 hdc2000;
rename R5510700 weight2000;
rename R7216000 age2001;
rename R7227601 hgc2001;
rename R7228501 hdc2001;
rename R7274300 weight2001;
rename S1531400 age2002;
rename S1542401 hdc2002;
rename S1541501 hgc2002;
rename S1598200 weight2002;
rename S2001000 age2003;
rename S2011301 hgc2003;
rename S2012201 hdc2003;
rename S2067100 weight2003;
rename S3801100 age2004;
rename S3812201 hgc2004;
rename S3813701 hdc2004;
rename S3861700 weight2004;
rename S5401000 age2005;
rename S5412600 hgc2005;
rename S5413300 hdc2005;   
rename S5444300 weight2005;

label var studentid "PUBID - YTH ID CODE 1997";
label var sex    "KEY!SEX (SYMBOL) 1997";
label var birthmo "KEY!BDATE M/Y (SYMBOL) 1997";
label var birthyr "KEY!BDATE M/Y (SYMBOL) 1997";
label var hgcmtr "HGC BY PR MOTH? 1997";
label var hgcftr "HGC BY PR FATH? 1997";
label var age1997 "CV_AGE_INT_DATE 1997";
label var hgc1997 "CV_HGC_EVER 1997";
label var faminc97 "CV_INCOME_GROSS_YR 1997";
label var povratio97 "CV_HH_POV_RATIO 1997";
label var wholiv12 "CV_YTH_REL_HH_AGE_12 1997";
label var relhead97 "CV_YTH_REL_HH_CURRENT 1997";
label var famsize "CV_HH_SIZE 1997";
label var raceeth  "KEY!RACE_ETHNICITY (SYMBOL) 1997";
label var age1998  "CV_AGE_INT_DATE 1998";
label var hgc1998  "CV_HGC_EVER_EDT 1998";
label var hdc1998  "CV_HIGHEST_DEGREE_EVER_EDT 1998";
label var age1999  "CV_AGE_INT_DATE 1999";
label var hgc1999  "CV_HGC_EVER_EDT 1999";
label var hdc1999  "CV_HIGHEST_DEGREE_EVER_EDT 1999";
label var age2000  "CV_AGE_INT_DATE 2000";
label var hgc2000  "CV_HGC_EVER_EDT 2000";
label var hdc2000  "CV_HIGHEST_DEGREE_EVER_EDT 2000";
label var age2001  "CV_AGE_INT_DATE 2001";
label var hgc2001  "CV_HGC_EVER_EDT 2001";
label var hdc2001  "CV_HIGHEST_DEGREE_EVER_EDT 2001";
label var age2002  "CV_AGE_INT_DATE 2002";
label var hgc2002  "CV_HGC_EVER_EDT 2002";
label var hdc2002  "CV_HIGHEST_DEGREE_EVER_EDT 2002";
label var age2003  "CV_AGE_INT_DATE 2003";
label var hgc2003  "CV_HGC_EVER_EDT 2003";
label var hdc2003  "CV_HIGHEST_DEGREE_EVER_EDT 2003";
label var age2004  "CV_AGE_INT_DATE 2004";
label var hgc2004  "CV_HGC_EVER_EDT 2004";
label var hdc2004  "CV_HIGHEST_DEGREE_EVER_EDT 2004";
label var age2005  "CV_AGE_INT_DATE 2005";
label var hgc2005  "CV_HGC_EVER 2005";
label var hdc2005  "CV_HIGHEST_DEGREE_EVER 2005";    
                   
gen yrage20 = 1997 - age1997 + 20;
forvalues i = 1997(1)2005 {;
     replace yrage20 = . if yrage20 == `i' & age`i' == .;
	 };
	 
drop if yrage20 == .;

gen hgcage20 = .;
forvalues i = 1999(1)2005 {;
  	 replace hgcage20 = hgc`i' if yrage20 == `i';
	 };

gen wgtage20 = .;
forvalues i = 1999(1)2005 {;
  	 replace wgtage20 = weight`i' if yrage20 == `i';
     replace wgtage20 = . if yrage20 == 19`i' & weight`i' == .;
	 };
	 
	
	gen yrged = 1998 if hdc1998 == 1;
gen yrdiploma = 1998 if hdc1998 == 2;
forvalues i = 1999(1)2005 {;
     replace yrged = `i' if hdc`i' == 1 & yrged == .;
     replace yrdiploma = `i' if hdc`i' == 2 & yrdiploma == .;
	 };
	 
gen diplomaby20 = 1 if yrdiploma <= yrage20;
     replace diplomaby20 = 0 if (yrdiploma > yrage20) | yrdiploma == .;
	 replace diplomaby20 = . if  yrage20 == .;
gen gedby20 = 1 if yrged <= yrage20;
     replace gedby20 = 0 if (yrged > yrage20) | yrged == .;
	 replace gedby20 = . if  yrage20 == .;
gen nodegreeby20 = 0 if gedby20 == 1 | diplomaby20 == 1;
     replace nodegreeby20 = 1 if gedby20 == 0 & diplomaby20 == 0 & yrage20 ~= .;
gen  educoutby20 = "hsdrop" if nodegreeby20 == 1;
     replace educoutby20 = "ged" if gedby20 == 1;
	 replace educoutby20 = "hsgrad" if diplomaby20 == 1;
gen dipgedby20 = diplomaby20 == 1 | gedby20 == 1;
	 
label var dipgedby20 "received a HS diploma or a GED by age 20";
label var diplomaby20 "received a HS diploma by age 20";
label var gedby20 "received a GED by age 20";
label var nodegreeby20 "neither a HS diploma or a GED by age 20";
label var educoutby20 "educational outcome by age 20";	

replace hgcmtr = . if hgcmtr < 0;
gen mtrhsdrop = hgcmtr <= 2;
     replace mtrhsdrop = . if hgcmtr == .;
gen mtrhsgrad = hgcmtr == 3;
     replace mtrhsgrad = . if hgcmtr == .;
gen mtranycol = hgcmtr >= 4;
     replace mtranycol = . if hgcmtr == .;
gen ftrhsdrop = hgcftr <= 2;
     replace ftrhsdrop = . if hgcftr == .;
gen ftrhsgrad = hgcftr == 3;
     replace ftrhsgrad = . if hgcftr == .;
gen ftranycol = hgcftr >= 4;
     replace ftranycol = . if hgcftr == .;

gen hispanic = raceeth == 2;
gen blacknh = raceeth == 1;
gen othernh = race == 3;
gen whitenh = raceeth == 4; 
gen female = sex == 2;
gen bothpar = wholiv12 == 1;
rename age1997 age;
gen smpwgt = weight1997/100;

sort studentid ;
save d:\data\nlsy97\dropout\temp1.dta, replace;

* read in data necessary to create college graduate DV;

clear;
infile using d:\data\nlsy97\dropout\highdeg.dct;

local xvar2 R0000100 R0536300 R0536401 R0536402 R1235800 R1482600 S2261100 
            S4032600 S5613000 S7683300 T0149600 T2120000 T3731100 T5322200 
			T6650500 T6767100;

foreach x of local xvar2
     {;
     replace `x' = . if `x' < 0;
	 };

rename R0000100 studentid;
rename S2261100 hdc2003;
rename S4032600 hdc2004;
rename S5613000 hdc2005;
rename S7683300 hdc2006;
rename T0149600 hdc2007;
rename T2120000 hdc2008;
rename T3731100 hdc2009;
rename T5322200 hdc2010;
rename T6767100 hdc2011;

label var studentid "PUBID - YTH ID CODE 1997";
label var hdc2003 "HIGHEST DEGREE EVER RECEIVED 2003";
label var hdc2004 "HIGHEST DEGREE EVER RECEIVED 2004";
label var hdc2005 "HIGHEST DEGREE EVER RECEIVED 2005";
label var hdc2006 "HIGHEST DEGREE EVER RECEIVED 2006";
label var hdc2007 "HIGHEST DEGREE EVER RECEIVED 2007";
label var hdc2008 "HIGHEST DEGREE EVER RECEIVED 2008";
label var hdc2009 "HIGHEST DEGREE EVER RECEIVED 2009";
label var hdc2010 "HIGHEST DEGREE EVER RECEIVED 2010";
label var hdc2011 "HIGHEST DEGREE EVER RECEIVED 2011";


gen colgrad = .;
forvalues i = 2003(1)2011 {;
  	 replace colgrad = 0 if hdc`i' < 5;
  	 replace colgrad = 1 if hdc`i' >= 5 & hdc`i' ~= .;
	 };

sort studentid ;
merge studentid using d:\data\nlsy97\dropout\temp1.dta;
tab _merge;
keep if _merge == 3;
drop _merge;
sort studentid ;
save d:\data\nlsy97\dropout\temp1.dta, replace;

* add in state data;
	 
clear;
use d:\data\nlsy97\dropout\geocodedata.dta;

keep studentid sex birthmo birthyr state1997 county1997;
rename state1997 stfips;
rename county1997 cntyfips;
sort studentid ;

merge studentid using d:\data\nlsy97\dropout\temp1.dta;
tab _merge;
drop if _merge ~= 3;
drop _merge;
drop if stfips == .;

sort studentid;
save d:\data\nlsy97\dropout\temp1.dta, replace;

* add in AFQT data;

clear;
infile using d:\data\nlsy97\dropout\afqt97.dct;

rename R0000100 studentid;
rename R9829600 afqt;
replace afqt = . if afqt < 0;
replace afqt = afqt/1000;

sort studentid;
merge studentid using d:\data\nlsy97\dropout\temp1.dta;

tab _merge;
drop _merge;

* generate age adjusted AFQT scores;

tab age, gen(agedv);
regress afqt agedv2-agedv3 agedv5-agedv7;
predict resid, resid;
gen afqtadj = _b[_cons]+resid;

sort stfips;
save d:\data\nlsy97\dropout\temp1.dta, replace;

clear;
use d:\data\nces\nels\convstate.dta;
sort statefips;
rename statefips stfips;
keep stfips stname;
merge stfips using d:\data\nlsy97\dropout\temp1.dta;
tab _merge;
keep if _merge == 3;
drop _merge;

sort stfips cntyfips;
save d:\data\nlsy97\dropout\temp1.dta, replace;

clear;
use d:\data\nces\crosswalk-county-msa;

destring msa, replace;
keep stfips cntyfips msa;
replace msa = . if msa == 0;

sort stfips cntyfips;
merge stfips cntyfips using d:\data\nlsy97\dropout\temp1.dta;
tab _merge;
drop if _merge == 1;
drop _merge;

sort studentid;
save d:\data\nlsy97\dropout\nlsydata.dta, replace;
erase d:\data\nlsy97\dropout\temp1.dta;
log close;
