# delimit ;
capture log close;
log using d:\statprog\dropout\inpnlsy1.log, replace;

**********************

INPNLSY1

**********************

This program inputs data from the NLSY capturing data on high school
dropout status of the respondents along with maternal educational
attainment and other individual characteristics;

set more 1;
clear;
infile using d:\data\nlsy\dropout\nlsy79-hsdrop.dct;

rename	R0000100	studentid;
rename	R0000300	birthmo;
rename	R0000500	birthyr;
rename	R0000600	age79;
rename	R0001900	wholiv14;
rename	R0006500	hgcmtr;
rename	R0007900	hgcftr;
rename	R0015700	grdatt79;
rename	R0018300	dipged79;
rename	R0018400	dipged79mo;
rename	R0018500	dipged79yr;
rename	R0021400	yrcol79;
rename	R0172700	race;
rename	R0190310	faminc79;
rename	R0214700	raceeth;
rename	R0214800	sex;
rename	R0216100	weight79;
rename	R0216701	hgc79;
rename	R0217502	famsize;
rename	R0220200	age80;
rename	R0228600	grdatt80;
rename	R0230000	dipged80;
rename	R0230100	dipged80mo;
rename	R0230200	dipged80yr;
rename	R0230600	yrcol80;
rename	R0389900	havedipged80;
rename  R0405200    weight80;
rename	R0406401	hgc80;
rename	R0410500	age81;
rename	R0416900	grdatt81;
rename	R0418200	dipged81;
rename	R0418300	dipged81mo;
rename	R0418400	dipged81yr;
rename	R0418800	yrcol81;
rename	R0533900	havedipged81;
rename  R0614600    weight81;
rename	R0618901	hgc81;
rename	R0650100	age82;
rename	R0664000	grdatt82;
rename	R0665300	dipged82;
rename	R0665400	dipged82mo;
rename	R0665500	dipged82yr;
rename	R0665900	yrcol82;
rename	R0813500	havedipged82;
rename  R0896700    weight82;
rename	R0898201	hgc82;
rename	R0900100	age83;
rename	R0905400	grdatt83;
rename	R0906700	dipged83;
rename	R0906800	dipged83mo;
rename	R0906900	dipged83yr;
rename	R0907300	yrcol83;
rename	R1048700	havedipged83;
rename  R1144400    weight83;
rename	R1145001	hgc83;
rename	R1205300	grdatt84;
rename	R1206600	dipged84;
rename	R1206700	dipged84mo;
rename	R1206800	dipged84yr;
rename	R1207700	yrcol84;
rename	R1432000	havedipged84;
rename  R1519600    weight84;
rename	R1520201	hgc84;
rename	R1520310	age84;
rename	R1604600	grdatt85;
rename	R1605800	diploma85;
rename	R1605900	dipged85;
rename	R1606000	dipged85mo;
rename	R1606100	dipged85yr;
rename	R1606900	yrcol85;
rename	R1865400	havedipged85;
rename  R1890200    weight85;
rename	R1890901	hgc85;
rename	R1891010	age85;

local xvar 
studentid birthmo birthyr age79 wholiv14 hgcmtr hgcftr grdatt79 dipged79
dipged79mo dipged79yr yrcol79 race faminc79 raceeth sex weight79 hgc79
famsize age80 grdatt80 dipged80 dipged80mo dipged80yr yrcol80 havedipged80
hgc80 age81 grdatt81 dipged81 dipged81mo dipged81yr yrcol81 havedipged81 
hgc81 age82 grdatt82 dipged82 dipged82mo dipged82yr yrcol82 havedipged82
hgc82 age83 grdatt83 dipged83 dipged83mo dipged83yr yrcol83 havedipged83
hgc83 grdatt84 dipged84 dipged84mo dipged84yr yrcol84 havedipged84 
hgc84 age84 grdatt85 diploma85 dipged85 dipged85mo dipged85yr yrcol85
havedipged85 hgc85 age85 weight80 weight81 weight82 weight83 weight84 weight85;

	  
foreach x of local xvar
     {;
     replace `x' = . if `x' < 0;
	 };

 
* keep respondents who are observed at age 20, so that I can tell if they
did or did not complete HS or get a GED by that age;

gen yrage20 = 1979 - age79 + 20;

forvalues i = 79(1)85 {;
     replace yrage20 = . if yrage20 == 19`i' & age`i' == .;
	 };
	 
drop if yrage20 == .;


gen hgcage20 = hgc79 if age79 > 20;
forvalues i = 79(1)85 {;
  	 replace hgcage20 = hgc`i' if age`i' == 20;
     replace hgcage20 = . if yrage20 == 19`i' & hgc`i' == .;
	 };

	
gen wgtage20 = weight79 if age79 > 20;
forvalues i = 79(1)85 {;
  	 replace wgtage20 = weight`i' if age`i' == 20;
     replace wgtage20 = . if yrage20 == 19`i' & weight`i' == .;
	 };
	 
	
gen yrdiploma = 1900 + dipged79yr if dipged79 == 1 | dipged79 == 3;
gen yrged = 1900 + dipged79yr if dipged79 == 2;

forvalues i = 80(1)85 {;
     replace yrged = 1900 + dipged`i'yr if dipged`i' == 2 & yrged == .;
     replace yrdiploma = 1900 + dipged`i'yr if (dipged`i' == 1 | dipged`i' == 3) 
	      & yrdiploma == .;
	 };

gen diplomaby20 = 1 if yrdiploma <= yrage20;
     replace diplomaby20 = 0 if (yrdiploma > yrage20) | yrdiploma == .;
	 replace diplomaby20 = . if  yrage20 == .;
gen gedby20 = 1 if yrged <= yrage20;
     replace gedby20 = 0 if (yrged > yrage20) | yrged == .;
	 replace gedby20 = 0 if diplomaby20 == 1;
	 replace gedby20 = . if  yrage20 == .;
gen nodegreeby20 = 0 if gedby20 == 1 | diplomaby20 == 1;
     replace nodegreeby20 = 1 if gedby20 == 0 & diplomaby20 == 0 & yrage20 ~= .;
gen  educoutby20 = "hsdrop" if nodegreeby20 == 1;
     replace educoutby20 = "ged" if gedby20 == 1;
	 replace educoutby20 = "hsgrad" if diplomaby20 == 1;
gen dipgedby20 = diplomaby20 == 1 | gedby20 == 1;


replace hgcmtr = . if hgcmtr < 0;
gen mtrhsdrop = hgcmtr <= 11;
     replace mtrhsdrop = . if hgcmtr == .;
gen mtrhsgrad = hgcmtr == 12;
     replace mtrhsgrad = . if hgcmtr == .;
gen mtranycol = hgcmtr >= 13 & hgcmtr <= 20;
     replace mtranycol = . if hgcmtr == .;
gen ftrhsdrop = hgcftr <= 11;
     replace ftrhsdrop = . if hgcftr == .;
gen ftrhsgrad = hgcftr == 12;
     replace ftrhsgrad = . if hgcftr == .;
gen ftranycol = hgcftr >= 13 & hgcftr <= 20;
     replace ftranycol = . if hgcftr == .;

gen hispanic = raceeth == 1;
gen blacknh = hispanic == 0 & race == 2;
gen othernh = hispanic == 0 & race == 3;
gen whitenh = raceeth == 3;
gen female = sex == 2;
gen bothpar = wholiv14 == 11;
gen age = age79 + 6;
gen smpwgt = weight79/100;

sort studentid;
save d:\data\nlsy\dropout\temp1.dta, replace;

* add in additional data on highest degree completed to get college grads;

clear;
infile using d:\data\nlsy\dropout\highdegree.dct;
rename R0000100 studentid;
rename R2509800 highdeg88;
rename R2909200 highdeg89;
rename R3111200 highdeg90;
rename R3511200 highdeg91; 
rename R3711200 highdeg92;
rename R4138900 highdeg93;
rename R4527600 highdeg94;
rename R5222900 highdeg96;
rename R5822800 highdeg98;
rename R6541400 highdeg00;
rename R7104600 highdeg02;
rename R7811500 highdeg04;
rename T0015400 highdeg06;
rename T1215400 highdeg08;
rename T2273900 highdeg10;

local xvar2 highdeg88 highdeg89 highdeg90 highdeg91 highdeg92 highdeg93 highdeg94 highdeg96
highdeg98 highdeg00 highdeg02 highdeg04 highdeg06 highdeg08 highdeg10 ;

foreach x of local xvar2
     {;
     replace `x' = . if `x' < 0;
	 };


gen colgrad88 = highdeg88 >= 3 & highdeg88 ~= .;
gen colgrad89 = highdeg88 >= 3 & highdeg89 ~= .;
gen colgrad90 = highdeg88 >= 3 & highdeg90 ~= .;
gen colgrad91 = highdeg88 >= 3 & highdeg91 ~= .;
gen colgrad92 = highdeg88 >= 3 & highdeg92 ~= .;
gen colgrad93 = highdeg88 >= 3 & highdeg93 ~= .;
gen colgrad94 = highdeg88 >= 3 & highdeg94 ~= .;
gen colgrad96 = highdeg88 >= 3 & highdeg96 ~= .;
gen colgrad98 = highdeg88 >= 3 & highdeg98 ~= .;
gen colgrad00 = highdeg88 >= 3 & highdeg00 ~= .;
gen colgrad02 = highdeg88 >= 3 & highdeg02 ~= .;
gen colgrad04 = highdeg88 >= 3 & highdeg04 ~= .;
gen colgrad06 = highdeg88 >= 3 & highdeg06 ~= .;
gen colgrad08 = highdeg88 >= 3 & highdeg08 ~= .;
gen colgrad10 = highdeg88 >= 3 & highdeg10 ~= .;

gen colgrad = colgrad88 == 1;
     replace colgrad = 1 if colgrad90 == 1;
     replace colgrad = 1 if colgrad91 == 1;
     replace colgrad = 1 if colgrad92 == 1;
     replace colgrad = 1 if colgrad93 == 1;
     replace colgrad = 1 if colgrad96 == 1;
     replace colgrad = 1 if colgrad98 == 1;
     replace colgrad = 1 if colgrad00 == 1;
     replace colgrad = 1 if colgrad02 == 1;
     replace colgrad = 1 if colgrad04 == 1;
     replace colgrad = 1 if colgrad06 == 1;
     replace colgrad = 1 if colgrad08 == 1;
     replace colgrad = 1 if colgrad10 == 1;
replace colgrad = . if highdeg88 == . & highdeg89 == . & highdeg90 == . 
     & highdeg91 == . & highdeg92 == . & highdeg93 == . & highdeg94 == .
	 & highdeg96 == . & highdeg98 == . & highdeg00 == . & highdeg02 == .
	 & highdeg04 == . & highdeg06 == . & highdeg08 == . & highdeg10 == .;

keep studentid colgrad;

sort studentid;
merge studentid using d:\data\nlsy\dropout\temp1.dta;
tab _merge;
drop if _merge ~= 3;
drop _merge;
sort studentid;
save d:\data\nlsy\dropout\temp1.dta, replace;

clear;
infile using d:\data\nlsy79\geocode\survey_and_created_variables_032212.dct;
keep R0000100 R0001500 R0001600;

rename R0000100 studentid;
rename R0001500 cntyfips14;
rename R0001600 stfips14;

replace studentid = . if studentid < 0;
replace cntyfips14 = . if cntyfips14 < 0;
replace stfips14 = . if stfips14 < 0;

sort studentid;
merge studentid using d:\data\nlsy\dropout\temp1.dta;
tab _merge;
drop if _merge ~= 3;
drop _merge;
sort studentid;
save d:\data\nlsy\dropout\temp1.dta, replace;

clear;
infile using d:\data\nlsy79\geocode\location_032212.dct;

keep R0000100 R0219001 R0219002;

rename R0000100 studentid;
rename R0219001 cntyfips79;
rename R0219002 stfips79;

sort studentid;

merge studentid using d:\data\nlsy\dropout\temp1.dta;
tab _merge;
drop if _merge ~= 3;
drop _merge;

rename stfips14 stfips;
rename cntyfips14 cntyfips;
drop if stfips == .;

sort studentid;
save d:\data\nlsy\dropout\temp1.dta, replace;

* add in AFQT data;

clear;
infile using d:\data\nlsy\dropout\asvab79.dct;

rename R0000100 studentid;
rename R0618301 afqt;
replace afqt = . if afqt < 0;
replace afqt = afqt/1000;

sort studentid;
merge studentid using d:\data\nlsy\dropout\temp1.dta;

tab _merge;
drop if _merge ~= 3;
drop _merge;

* generate age adjusted AFQT scores;

tab age79, gen(agedv);
regress afqt agedv2-agedv4 agedv6-agedv9;
predict resid, resid;
gen afqtadj = _b[_cons]+resid;

sort stfips;
save d:\data\nlsy\dropout\temp1.dta, replace;

clear;
use d:\data\nces\nels\convstate.dta;
sort statefips;
rename statefips stfips;
keep stfips stname;
merge stfips using d:\data\nlsy\dropout\temp1.dta;
tab _merge;
drop _merge;

sort stfips cntyfips;
save d:\data\nlsy\dropout\temp1.dta, replace;

clear;
use d:\data\nces\crosswalk-county-msa;

destring msa, replace;
keep stfips cntyfips msa;
replace msa = . if msa == 0;

sort stfips cntyfips;
merge stfips cntyfips using d:\data\nlsy\dropout\temp1.dta;
tab _merge;
drop if _merge == 1;
drop _merge;

save d:\data\nlsy\dropout\nlsydata.dta, replace;
erase d:\data\nlsy\dropout\temp1.dta;
log close;
