* msaFEs.do
* Create MSA residuals & fixed effects
* Edward L. Glaeser (eglaeser@harvard.edu) and Joshua D. Gottlieb (jdgottl@fas.harvard.edu)

cap log close wageregs
now
local date = r(date)
local time = r(time)
log using ~/urban/logs/wageregs_`date'_`time'.log, text name(wageregs)

clear
cap set more off

set mem 6g
set matsize 400

local times "yr"
* local times "yr wk hr"

local def "99pmsa"

foreach yr in 80 90 00 {

	if `yr' == 80 local year = "1980"
	else if `yr' == 90 local year = "1990"
	else local year = "2000"

	use ~/bulk/ipums/IPUMSprepped`yr', clear

	************************************************************************
	** Run wage regressions with only individual characteristics
	************************************************************************

	** 
	** Full sample
	**

	prun reg inc male age30s age40s age50s age60s agegt70 hsdiploma college   [fweight = perwt], robust
	outreg using `year'wageregs_all.csv, se bracket replace
	predict e_all_yr, residual

	prun reg wkinc male age30s age40s age50s age60s agegt70 hsdiploma college  [fweight = perwt], robust
	outreg using `year'wageregs_all.csv, se bracket append
	predict e_all_wk, residual

	prun reg hrinc male age30s age40s age50s age60s agegt70 hsdiploma college  [fweight = perwt], robust
	outreg using `year'wageregs_all.csv, se bracket append
	predict e_all_hr, residual

	prun reg Linc male age30s age40s age50s age60s agegt70 hsdiploma college  [fweight = perwt], robust
	outreg using `year'wageregs_all.csv, se bracket append
	predict log_all_yr, residual

	prun reg Lwkinc male age30s age40s age50s age60s agegt70 hsdiploma college  [fweight = perwt], robust
	outreg using `year'wageregs_all.csv, se bracket append
	predict log_all_wk, residual

	prun reg Lhrinc male age30s age40s age50s age60s agegt70 hsdiploma college  [fweight = perwt], robust
	outreg using `year'wageregs_all.csv, se bracket append
	predict log_all_hr, residual

	sum e_all_yr e_all_wk e_all_hr [fweight = perwt], det

	**
	** Restrict to prime-age males
	**

	prun reg inc age30s age40s age50s hsdiploma college if pm == 1 [fweight = perwt], robust
	outreg using `year'wageregs_pm.csv, se bracket replace
	predict e_pm_yr, residual

	prun reg wkinc age30s age40s age50s hsdiploma college if pm == 1 [fweight = perwt], r
	outreg using `year'wageregs_pm.csv, se bracket append
	predict e_pm_wk, r

	prun reg hrinc age30s age40s age50s hsdiploma college if pm == 1 [fweight = perwt], robust
	outreg using `year'wageregs_pm.csv, se bracket append
	predict e_pm_hr, residual

	prun reg Linc age30s age40s age50s hsdiploma college if pm == 1 [fweight = perwt], robust
	outreg using `year'wageregs_pm.csv, se bracket append
	predict log_pm_yr, residual

	prun reg Lwkinc age30s age40s age50s hsdiploma college if pm == 1 [fweight = perwt], robust
	outreg using `year'wageregs_pm.csv, se bracket append
	predict log_pm_wk, residual

	prun reg Lhrinc age30s age40s age50s hsdiploma college if pm == 1 [fweight = perwt], robust
	outreg using `year'wageregs_pm.csv, se bracket append
	predict log_pm_hr, residual

	**
	** Average residuals across metro areas
	**

	sort msa

	by msa: egen sum_weights = sum(perwt)

	by msa: egen totresid_yr = sum((e_all_yr * perwt))
	by msa: egen totresid_wk = sum((e_all_wk * perwt))
	by msa: egen totresid_hr = sum((e_all_hr * perwt))

	by msa: egen totlogresid_all_yr = sum((log_all_yr * perwt))
	by msa: egen totlogresid_all_wk = sum((log_all_wk * perwt))
	by msa: egen totlogresid_all_hr = sum((log_all_hr * perwt))

	by msa: gen r_all_yr = totresid_yr/sum_weights
	by msa: gen r_all_wk = totresid_wk/sum_weights
	by msa: gen r_all_hr = totresid_hr/sum_weights

	by msa: gen l_all_yr = totlogresid_all_yr/sum_weights
	by msa: gen l_all_wk = totlogresid_all_wk/sum_weights
	by msa: gen l_all_hr = totlogresid_all_hr/sum_weights

	drop sum_weights
	drop totresid_yr totresid_wk totresid_hr totlogresid_all_yr totlogresid_all_wk totlogresid_all_hr
	drop e_all_yr e_all_wk e_all_hr log_all_yr log_all_wk log_all_hr

	sort msa

	by msa: egen sum_weights_pm = sum(perwt) if pm == 1

	by msa: egen totresid_pm_yr = sum((e_pm_yr * perwt)) if pm == 1
	by msa: egen totresid_pm_wk = sum((e_pm_wk * perwt)) if pm == 1
	by msa: egen totresid_pm_hr = sum((e_pm_hr * perwt)) if pm == 1

	by msa: egen totlogresid_pm_yr = sum((log_pm_yr * perwt)) if pm == 1
	by msa: egen totlogresid_pm_wk = sum((log_pm_wk * perwt)) if pm == 1
	by msa: egen totlogresid_pm_hr = sum((log_pm_hr * perwt)) if pm == 1

	by msa: gen r_pm_yr = totresid_pm_yr/sum_weights_pm if pm == 1
	by msa: gen r_pm_wk = totresid_pm_wk/sum_weights_pm if pm == 1
	by msa: gen r_pm_hr = totresid_pm_hr/sum_weights_pm if pm == 1

	by msa: gen l_pm_yr = totlogresid_pm_yr/sum_weights_pm if pm == 1
	by msa: gen l_pm_wk = totlogresid_pm_wk/sum_weights_pm if pm == 1
	by msa: gen l_pm_hr = totlogresid_pm_hr/sum_weights_pm if pm == 1

	drop sum_weights_pm
	drop totresid_pm_yr totresid_pm_wk totresid_pm_hr totlogresid_pm_yr totlogresid_pm_wk totlogresid_pm_hr
	drop e_pm_yr e_pm_wk e_pm_hr log_pm_yr log_pm_wk log_pm_hr

	************************************************************************
	** Collapse into a set with the average residual per MSA (so each observation is a MSA)
	************************************************************************

	sort msa
	drop if mi(e_pm_yr)
	gen first = 0
	replace first = 1 if msa != msa[_n-1] & msa[_n-1] != .
	replace first = 1 if msa[_n-1] == .
	keep if first == 1
	keep statefip msa r_*_* l_*_* metarea msa

	foreach v in r l {
		if "`v'" == "r" local _v = "wage resid"
		if "`v'" == "l" local _v = "log wage resid"
		if "`v'" == "a" local _v = "wage resid with MSA chars"
		if "`v'" == "m" local _v = "log wage resid with MSA chars"

		foreach s in all pm {
			if "`s'" == "all" local _s = ""
			if "`s'" == "pm" local _s = " for prime males"
			
			foreach t in yr wk hr {
				if "`t'" == "yr" local _t = "annual"
				if "`t'" == "wk" local _t = "weekly"
				if "`t'" == "hr" local _t = "hourly"

				lab var `v'_`s'_`t' "Avg `_t' `_v'`_s', `year'"				
				ren `v'_`s'_`t' `v'_`s'_`t'`yr'
			}	
		}
	}

	sort msa

	save "~/urban/ipums/residuals`yr'.dta", replace
}

foreach yr in 80 90 00 {

	if `yr' == 80 local year = "1980"
	else if `yr' == 90 local year = "1990"
	else local year = "2000"

	use ~/bulk/ipums/IPUMSprepped`yr', clear
	drop if msa == 0

	foreach t in `times' {

		char _dta[omit] 9360

		** Level income wage regs

		prun xi: reg inc i.msa [fweight = perwt], r nocons
		matrix G = e(b)
		matrix J = G["y1", "_Imsa_40"...]

		prun xi: reg inc male age30s age40s age50s age60s agegt70 hsdiploma college i.msa [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Imsa_40"...]

		matrix J = J \ I

		prun xi: reg inc i.msa [fweight = perwt] if pm == 1, r nocons
		matrix G = e(b)
		matrix I = G["y1", "_Imsa_40"...]

		matrix J = J \ I

		prun xi: reg inc age30s age40s age50s hsdiploma college i.msa [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Imsa_40"...]

		matrix J = J \ I

		** Log income wage regs

		prun xi: reg Linc i.msa [fweight = perwt], r nocons
		matrix G = e(b)
		matrix I = G["y1", "_Imsa_40"...]

		matrix J = J \ I

		prun xi: reg Linc male age30s age40s age50s age60s agegt70 hsdiploma college i.msa [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Imsa_40"...]

		matrix J = J \ I

		prun xi: reg Linc i.msa [fweight = perwt] if pm == 1, r nocons
		matrix G = e(b)
		matrix I = G["y1", "_Imsa_40"...]

		matrix J = J \ I

		prun xi: reg Linc age30s age40s age50s hsdiploma college i.msa [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Imsa_40"...]

		matrix J = J \ I

		drop _all

		svmat J, n(col)

		gen regtype = _n
		lab var regtype "Regression type"

		reshape i regtype
		reshape j msa
		reshape xij _Imsa_@
		reshape 
		reshape long

		ren _Imsa_ fe

		reshape clear
		reshape i msa
		reshape j regtype
		reshape xij fe
		reshape
		reshape wide

		lab var msa "MSA Code"

		do metareadlbl`def'
		lab values msa metareadlbl

		ren fe1 FEuncontrolled_all`yr'
		ren fe2 FEcontrolled_all`yr'
		ren fe3 FEuncontrolled_pm`yr'
		ren fe4 FEcontrolled_pm`yr'
		ren fe5 FEloguncontrolled_all`yr'
		ren fe6 FElogcontrolled_all`yr'
		ren fe7 FEloguncontrolled_pm`yr'
		ren fe8 FElogcontrolled_pm`yr'

		foreach l in FE FElog {

			if ("`l'" == "FE") local _l "MSA wage regression fixed effect, `year',"
			if ("`l'" == "FElog") local _l "MSA log wage regression fixed effect, `year',"

			foreach c in uncontrolled controlled {

				if ("`c'" == "uncontrolled") local _c "without individual controls"
				if ("`c'" == "controlled") local _c "after individual controls"

				foreach g in all pm {

					if ("`g'" == "all") local _g ""
					if ("`g'" == "pm") local _g "prime-age males, "

					sum `l'`c'_`g'`yr', meanonly
					local m=r(mean)
					replace `l'`c'_`g'`yr' = `l'`c'_`g'`yr' - `m'

					lab var `l'`c'_`g'`yr' "`_l' `_g'`_c' (demeaned)"
				}
			}
		}

		sort msa

		save FE`yr', replace

	}
}



foreach yr in 80 90 00 {

	if `yr' == 80 local year = "1980"
	else if `yr' == 90 local year = "1990"
	else local year = "2000"

	use ~/bulk/ipums/IPUMSprepped`yr', clear
	drop if msa == 0

	foreach t in `times' {

		char _dta[omit] 10

		** Level income wage regs

		prun xi: reg inc i.ind1990 if ind1990 != 0 [fweight = perwt], r nocons
		matrix G = e(b)
		matrix J = G["y1", "_Iind1990_10"...]

		prun xi: reg inc male age30s age40s age50s age60s agegt70 hsdiploma college i.ind1990 if ind1990 != 0 [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Iind1990_10"...]

		matrix J = J \ I

		prun xi: reg inc i.ind1990 if ind1990 != 0 & pm == 1 [fweight = perwt], r nocons
		matrix G = e(b)
		matrix I = G["y1", "_Iind1990_10"...]

		matrix J = J \ I

		prun xi: reg inc age30s age40s age50s hsdiploma college i.ind1990 if ind1990 != 0 [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Iind1990_10"...]

		matrix J = J \ I

		** Log income wage regs

		prun xi: reg Linc i.ind1990 if ind1990 != 0 [fweight = perwt], r nocons
		matrix G = e(b)
		matrix I = G["y1", "_Iind1990_10"...]

		matrix J = J \ I

		prun xi: reg Linc male age30s age40s age50s age60s agegt70 hsdiploma college i.ind1990 if ind1990 != 0 [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Iind1990_10"...]

		matrix J = J \ I

		prun xi: reg Linc i.ind1990 if ind1990 != 0 & pm == 1 [fweight = perwt] , r nocons
		matrix G = e(b)
		matrix I = G["y1", "_Iind1990_10"...]

		matrix J = J \ I

		prun xi: reg Linc age30s age40s age50s hsdiploma college i.ind1990 if ind1990 != 0 [fweight = perwt], r nocons
		matrix G=e(b)
		matrix I=G["y1", "_Iind1990_10"...]

		matrix J = J \ I

		drop _all

		svmat J, n(col)

		gen regtype = _n
		lab var regtype "Regression type"

		reshape i regtype
		reshape j ind1990
		reshape xij _Iind1990_@
		reshape 
		reshape long

		ren _Iind1990_ fe

		reshape clear
		reshape i ind1990
		reshape j regtype
		reshape xij fe
		reshape
		reshape wide

		lab var ind1990 "Industry code, 1990 basis"

*		do metareadlbl`def'
*		lab values msa metareadlbl
		lab values ind1990 ind1990lbl

		ren fe1 FEuncontrolled_all`yr'
		ren fe2 FEcontrolled_all`yr'
		ren fe3 FEuncontrolled_pm`yr'
		ren fe4 FEcontrolled_pm`yr'
		ren fe5 FEloguncontrolled_all`yr'
		ren fe6 FElogcontrolled_all`yr'
		ren fe7 FEloguncontrolled_pm`yr'
		ren fe8 FElogcontrolled_pm`yr'

		foreach l in FE FElog {

			if ("`l'" == "FE") local _l "Industry wage regression fixed effect, `year',"
			if ("`l'" == "FElog") local _l "Industry log wage regression fixed effect, `year',"

			foreach c in uncontrolled controlled {

				if ("`c'" == "uncontrolled") local _c "without individual controls"
				if ("`c'" == "controlled") local _c "after individual controls"

				foreach g in all pm {

					if ("`g'" == "all") local _g ""
					if ("`g'" == "pm") local _g "prime-age males, "

					sum `l'`c'_`g'`yr', meanonly
					local m=r(mean)
					replace `l'`c'_`g'`yr' = `l'`c'_`g'`yr' - `m'

					lab var `l'`c'_`g'`yr' "`_l' `_g'`_c' (demeaned)"
				}
			}
		}

		sort ind1990

		label data "Industry fixed effects in wage regressions, `year'"
		save FEind`yr', replace

	}
}

use FEind80, clear
merge ind1990 using FEind90, _merge(_feind90) sort
merge ind1990 using FEind00, _merge(_feind00) sort
sort ind1990
label data "Industry fixed effects in wage regressions, 1980-2000"
save FEind, replace

