* ipums.do
* Clean IPUMS data for use in wage regressions
* Edward L. Glaeser (eglaeser@harvard.edu) and Joshua D. Gottlieb (jdgottl@fas.harvard.edu)

cd ~/urban/ipums/

cap log close ipums

now
local date = r(date)
local time = r(time)
log using "~/urban/logs/ipums_`date'_`time'.log", text replace name(ipums)
clear

set more off
set mem 10g

local def "99pmsa"
lab def malelbl 0 "Female" 1 "Male"
lab def binary 0 "No" 1 "Yes"

* For 0.1% sample:
* local s = "_001"

* For complete sample:
local s = ""

foreach censusyear in 97 98 99 0 {

	local year = "2000"
	local yr = "00"

	if `censusyear' == 97 {
		local year = "1970"
		local yr = "70"
	}

	if `censusyear' == 98 {
		local year = "1980"
		local yr = "80"
	}

	if `censusyear' == 99 {
		local year = "1990"
		local yr = "90"
	}

	use "~/bulk/ipums/jdgottl_fas_harvard_edu_010`s'.dta", clear

	keep if year == `censusyear'

	if ("`def'" == "99pmsa") do defs99pmsa

	ren metaread msa
	sort msa

	if ("`def'" == "99pmsa") lab values migmet5 metareadlbl
	
	gen samemsa = 1 if msa == migmet5 & msa != 0
	gen diffmsa = 1 if msa != migmet5 & migmet5 != 0
	gen nomsa = 1 if migmet5 == 0
	foreach var of varlist samemsa nomsa diffmsa {
		recode `var' (.=0) (1=1) if msa != 0
		lab val `var' binary
	}
	lab var samemsa "In same MSA 5 years ago"
	lab var diffmsa "In different MSA 5 years ago"
	lab var nomsa "Not in MSA 5 years ago"

	** Only want people who worked last year, are still in the labor force, are over 18, and are currently working

	* Require yearly earnings at least half the minimum wage
	/* 2000: 5.15 * 40 * 52 / 2 = 10712 / 2 */
	/* 1990: 3.80 * 40 * 52 / 2 = 7904 / 2  */
	/* 1980: 3.10 * 40 * 52 / 2 = 6448 / 2  */
	/* 1970: 1.60 * 40 * 52 / 2 = 3328 / 2  */

	if (`year' == 2000) keep if incwage >= 5356
	if (`year' == 1990) keep if incwage >= 3952 
	if (`year' == 1980) keep if incwage >= 3224 
	if (`year' == 1970) keep if incwage >= 1664 

	keep if workedyr == 2
	keep if empstatd == 10 | empstatd == 14
	keep if age >= 18

	drop workedyr
	drop empstatd

	** Create dummies for regressions

	recode sex (2=0)
	rename sex male
	lab val male malelbl
	lab var male "Male"

	gen pm = 0
	replace pm = 1 if age <= 55 & age >= 25 & male == 1
	lab var pm "Prime-age male"

	recode age (30/39=1) (else=0), gen(age30s)
	lab var age30s "Age in 30s"

	recode age (40/49=1) (else=0), gen(age40s)
	lab var age40s "Age in 40s"

	recode age (50/59=1) (else=0), gen(age50s)
	lab var age50s "Age in 50s"

	recode age (60/69=1) (else=0), gen(age60s)
	lab var age60s "Age in 60s"

	recode age (70/126=1) (else=0), gen(agegt70)
	lab var agegt70 "Age 70 or above"

	drop age

	recode race (2=1) (else=0), gen(black)
	lab var black "Individual is black"

	recode race (4=1) (5=1) (6=1) (else=0), gen(asian)
	lab var asian "Individual is Asian"

	recode race (3=1) (7=1) (8=1) (9=1) (else=0), gen(other)
	lab var other "Individual is other race"

	drop race

	recode educrec (7/9=1) (else=0), gen(hsdiploma)
	lab var hsdiploma "High school graduate"

	recode educrec (9=1) (else=0), gen(college)
	lab var college "College graduate"

	drop educrec

	recode speakeng (0=.) (1=0) (2/6=1), gen(english)
	lab var english "Speaks English"

	drop speakeng

	recode citizen (0/2=1) (3/6=.), gen(amcit)
	lab var amcit "American citizen"

	drop citizen

	foreach var of varlist pm-amcit {
		lab values `var' binary
	}

	foreach var of varlist male age30s-amcit {
		gen `var'2000 = `var' if year == 0
		replace `var'2000 = 0 if mi(`var'2000) & !mi(`var')
		local varlab: var lab `var'
		lab var `var'2000 "`var' X 2000"
		lab values `var'2000 binary
	}

	** Generate weekly and hourly wage variables

	ren incwage inc
	lab var inc "Wages, `year'"

	gen wkinc = inc/wkswork1
	gen hrinc = inc/(wkswork1*uhrswork)

	lab var wkinc "Wage/wk, `year'"
	lab var hrinc "Wage/hr, `year'"

	** Generate log income variables

	gen Linc = log(inc) if inc > 0
	gen Lwkinc = log(wkinc) if wkinc > 0
	gen Lhrinc = log(hrinc) if hrinc > 0

	lab var Linc "Log wage"
	lab var Lwkinc "Log wage/wk, `year'"
	lab var Lhrinc "Log wage/hr, `year'"

	save ~/bulk/ipums/IPUMSfull`yr'`s', replace

	** Only want people living in metro areas for most purposes

	drop if metro == 0 | metro == 1

	compress
	save ~/bulk/ipums/IPUMSprepped`yr'`s', replace
}

use ~/bulk/ipums/IPUMSprepped00`s', clear
append using ~/bulk/ipums/IPUMSprepped90`s'
append using ~/bulk/ipums/IPUMSprepped80`s'
recode year (0=1 "Year 2000") (else=0 "Not 2000"), gen(yr2000)
recode year (99=1 "Year 1990") (else=0 "Not 1990"), gen(yr1990)
recode year (98=1 "Year 1980") (else=0 "Not 1980"), gen(yr1980)
lab var yr1980 "Year 1980"
lab var yr1990 "Year 1990"
lab var yr2000 "Year 2000"
save ~/bulk/ipums/MSAplusMicro80_00`s', replace

use ~/bulk/ipums/IPUMSprepped00`s', clear
append using ~/bulk/ipums/IPUMSprepped90`s'
recode year (0=1 "Year 2000") (else=0 "Other year"), gen(yr2000)
recode year (99=1 "Year 1990") (else=0 "Other year"), gen(yr1990)
lab var yr1990 "Year 1990"
lab var yr2000 "Year 2000"
save ~/bulk/ipums/MSAplusMicro90_00`s', replace



