* msaindustries.do
* Determine distribution of industries across MSAs
* Joshua D. Gottlieb (jdgottl@fas.harvard.edu)

local ipums = "~/urban/ipums/"
local bulk = "~/bulk/ipums/"
local ind = "~/urban/industries/"

cd `ind'

cap log close msaind
now
local date=r(date)
local time=r(time)
log using ~/urban/logs/msaind_`date'_`time'.log, text name(msaind)

clear
set mem 3g

local t = ""
* local t = "_001"

foreach yr in 70 80 90 00 {
	if "`yr'" == "70" local year = "1970"
	if "`yr'" == "80" local year = "1980"
	else if "`yr'" == "90" local year = "1990"
	else local year = "2000"

	use `bulk'IPUMSprepped`yr'`t'
	drop if ind1990 == 0
	gen i = 1
	qui table ind1990 [fw=perwt], c(sum i rawsum i sum hsdiploma sum college sum inc)  replace name(stat)

	ren stat1 IndEmp`yr'
	ren stat2 IndUnwEmp`yr'
        ren stat3 IndHSGrads`yr'
        ren stat4 IndBAGrads`yr'
        ren stat5 IndIncTot`yr'

	lab var ind1990 "Industry code (1990 Census industry)"

	lab var IndEmp`yr' "Number of employees in industry, `year'"
	lab var IndUnwEmp`yr' "Number of employees in industry ignoring census weight, `year'"
        lab var IndHSGrads`yr' "High school graduates in industry, `year'"
        lab var IndBAGrads`yr' "College graduates in industry, `year'"
        lab var IndIncTot`yr' "Total income in industry, `year'"

        gen IndHSPct`yr' = IndHSGrads`yr' / IndEmp`yr'
        gen IndBAPct`yr' = IndBAGrads`yr' / IndEmp`yr'
        gen IndIncPerCap`yr' = IndIncTot`yr' / IndEmp`yr'
        gen lIndIncPerCap`yr' = log(IndIncPerCap`yr')

        lab var IndHSPct`yr' "Percent high school graduates in industry, `year'"
        lab var IndBAPct`yr' "Percent college graduates in industry, `year'"
        lab var IndIncPerCap`yr' "Mean income in industry, `year'"
        lab var lIndIncPerCap`yr' "Log mean income in industry, `year'"

	lab data "Industry employment data (IPUMS), 1990 census industry 2-digit codes, `year'"	
	save AllIndustryEmp`yr'`t', replace

	use `bulk'IPUMSprepped`yr'`t'
	drop if ind1990 == 0
	gen i = 1
	qui table ind1990 [fw=perwt], c(sum male sum age30s sum age40s sum age50s sum age60s)  replace name(stat)

	ren stat1 IndMale`yr'
	ren stat2 Ind30s`yr'
        ren stat3 Ind40s`yr'
        ren stat4 Ind50s`yr'
        ren stat5 Ind60s`yr'

	lab var ind1990 "Industry code (1990 Census industry)"
	lab var IndMale`yr' "Number of employees who are male, `year'"

	tempfile newchars
	save `newchars'

	use AllIndustryEmp`yr'`t', clear
	merge ind1990 using `newchars'
	tab _merge
	drop _merge

	gen IndMalePct`yr' = IndMale`yr' / IndEmp`yr'
	lab var IndMalePct`yr' "Percent of industry's employees who are male, `year'"

	forvalues y=3/6 {
		gen Ind`y'0sPct`yr' = Ind`y'0s`yr' / IndEmp`yr'

		lab var Ind`y'0s`yr' "Number of employeess in `y'0s, `year'"
		lab var Ind`y'0sPct`yr' "Percent of employees in `y'0s, `year'"
	}	
	save, replace



	use `bulk'IPUMSprepped`yr'`t'
	drop if ind1990 == 0
	gen i = 1
	qui table msa ind1990 [fw=perwt], c(sum i rawsum i sum hsdiploma sum college sum inc)  replace name(stat)

	ren stat1 MSAIndEmp`yr'
	ren stat2 MSAIndUnwEmp`yr'
        ren stat3 MSAIndHSGrads`yr'
        ren stat4 MSAIndBAGrads`yr'
        ren stat5 MSAIndIncTot`yr'

	lab var msa "MSA"
	lab var ind1990 "Industry code (1990 Census industry)"

	lab var MSAIndEmp`yr' "Number of employees in MSA-industry, `year'"
	lab var MSAIndUnwEmp`yr' "Number of employees in MSA-industry ignoring census weight, `year'"
        lab var MSAIndHSGrads`yr' "High school graduates in MSA-industry, `year'"
        lab var MSAIndBAGrads`yr' "College graduates in MSA-industry, `year'"
        lab var MSAIndIncTot`yr' "Total income in MSA-industry, `year'"

        gen MSAIndHSPct`yr' = MSAIndHSGrads`yr' / MSAIndEmp`yr'
        gen MSAIndBAPct`yr' = MSAIndBAGrads`yr' / MSAIndEmp`yr'
        gen MSAIndIncPerCap`yr' = MSAIndIncTot`yr' / MSAIndEmp`yr'
        gen lMSAIndIncPerCap`yr' = log(MSAIndIncPerCap`yr')

        lab var MSAIndHSPct`yr' "Percent high school graduates within MSA-industry, `year'"
        lab var MSAIndBAPct`yr' "Percent college graduates within MSA-industry, `year'"
        lab var MSAIndIncPerCap`yr' "Mean income within MSA-industry, `year'"
        lab var lMSAIndIncPerCap`yr' "Log mean income within MSA-industry, `year'"

	lab data "MSA-industry employment data (IPUMS), `year'"	
	save MSAIndustryEmp`yr'`t', replace

	use `bulk'IPUMSprepped`yr'`t'
	drop if ind1990 == 0
	gen i = 1
	qui table msa ind1990 [fw=perwt], c(sum male sum age30s sum age40s sum age50s sum age60s)  replace name(stat)

	ren stat1 MSAIndMale`yr'
	ren stat2 MSAInd30s`yr'
        ren stat3 MSAInd40s`yr'
        ren stat4 MSAInd50s`yr'
        ren stat5 MSAInd60s`yr'

	lab var msa "MSA"
	lab var ind1990 "Industry code (1990 Census industry)"
	lab var MSAIndMale`yr' "Number of employees who are male, `year'"

	tempfile newchars
	save `newchars'

	use MSAIndustryEmp`yr'`t', clear
	merge msa ind1990 using `newchars'
	tab _merge
	drop _merge

	gen MSAIndMalePct`yr' = MSAIndMale`yr' / MSAIndEmp`yr'
	lab var MSAIndMalePct`yr' "Percent of industry's employees who are male, `year'"

	forvalues y=3/6 {
		gen MSAInd`y'0sPct`yr' = MSAInd`y'0s`yr' / MSAIndEmp`yr'

		lab var MSAInd`y'0s`yr' "Number of employeess in `y'0s, `year'"
		lab var MSAInd`y'0sPct`yr' "Percent of employees in `y'0s, `year'"
	}	
	save, replace
}

use AllIndustryEmp70`t'
merge ind1990 using AllIndustryEmp80`t' AllIndustryEmp90`t' AllIndustryEmp00`t', sort _merge(_ind)
tab1 _ind*
drop _ind*

do groupindustries

foreach yr in 70 80 90 00 {
        if "`yr'" == "70" local year = "1970"
        else if "`yr'" == "80" local year = "1980"
        else if "`yr'" == "90" local year = "1990"
        else local year = "2000"

	xtile indHCquart`yr' = IndBAPct`yr', n(4)
	recode indHCquart`yr' (1/3=0) (4=1), gen(indHCtop`yr')
	recode indHCquart`yr' (1=1) (2/4=0), gen(indHCbot`yr')

	lab var indHCquart`yr' "Industry's quartile of the HC dist, `year'"
	lab var indHCtop`yr' "Industry in top quartile of the HC dist, `year'"
	lab var indHCbot`yr' "Industry in bottom quartile of the HC dist, `year'"

	gen indHCsector_quart`yr' = .
	foreach sec in 1 3 5 7 9 {
		tempvar q
		xtile `q' = IndBAPct`yr' if sector == `sec', n(4)
		replace indHCsector_quart`yr' = `q' if !mi(`q')

	}
	recode indHCsector_quart`yr' (1/3=0) (4=1), gen(indHCsector_top`yr')
	recode indHCsector_quart`yr' (1=1) (2/4=0), gen(indHCsector_bot`yr')

	drop __*

	lab var indHCsector_quart`yr' "Industry's quartile of its sector's HC dist, `year'"
	lab var indHCsector_top`yr' "Ind in top quartile of its sector's HC dist, `year'"
	lab var indHCsector_bot`yr' "Ind in bottom quartile of its sector's HC dist, `year'"
}

label data "Industry-level employment data (from IPUMS), by 1990 Census industry, 1980-2000"
save AllIndustryEmp`t', replace

use MSAIndustryEmp70`t'
merge msa ind1990 using MSAIndustryEmp80`t' MSAIndustryEmp90`t' MSAIndustryEmp00`t', sort _merge(_msa_ind)
tab1 _msa_ind*
drop _msa_ind*

merge ind1990 using AllIndustryEmp`t', sort _merge(_inddata) uniqusing
tab _inddata
drop _inddata

foreach yr in 70 80 90 00 {
        if "`yr'" == "70" local year = "1970"
        else if "`yr'" == "80" local year = "1980"
        else if "`yr'" == "90" local year = "1990"
        else local year = "2000"

	bysort msa: egen MSAEmp`yr' = total(MSAIndEmp`yr')
	bysort msa: egen MSAUnwEmp`yr' = total(MSAIndUnwEmp`yr')
	bysort msa sector: egen MSASectorEmp`yr' = total(MSAIndEmp`yr')
	bysort msa sector: egen MSASectorUnwEmp`yr' = total(MSAIndUnwEmp`yr')

	lab var MSAEmp`yr' "Total MSA employees in known ind, `year'"
	lab var MSAUnwEmp`yr' "Total MSA employees in known ind ignoring census weight, `year'"
	lab var MSASectorEmp`yr' "Total MSA employees in sector, `year'"
	lab var MSASectorUnwEmp`yr' "Total MSA employees in sector ignoring census weight, `year'"

	gen MSAIndPct`yr' = MSAIndEmp`yr' / MSAEmp`yr'
	gen MSAIndUnwPct`yr' = MSAIndUnwEmp`yr' / MSAUnwEmp`yr'
	gen MSAIndPctSQ`yr' = MSAIndPct`yr' * MSAIndPct`yr'
	gen MSAIndUnwPctSQ`yr' = MSAIndUnwPct`yr' * MSAIndUnwPct`yr'

	gen MSAIndSecPct`yr' = MSAIndEmp`yr' / MSASectorEmp`yr'
	gen MSAIndSecUnwPct`yr' = MSAIndUnwEmp`yr' / MSASectorUnwEmp`yr'
	gen MSAIndSecPctSQ`yr' = MSAIndSecPct`yr' * MSAIndSecPct`yr'
	gen MSAIndSecUnwPctSQ`yr' = MSAIndSecUnwPct`yr' * MSAIndSecUnwPct`yr'

	lab var MSAIndPct`yr' "Percent of MSA emp in ind, `year'"
	lab var MSAIndUnwPct`yr' "Percent of MSA emp in ind ignoring census weight, `year'"
	lab var MSAIndPctSQ`yr' "Percent of MSA emp in ind squared, `year'"
	lab var MSAIndUnwPctSQ`yr' "Percent of MSA emp in ind ignoring census weight squared, `year'"
	
	lab var MSAIndSecPct`yr' "Percent of MSA-sector emp in ind, `year'"
	lab var MSAIndSecUnwPct`yr' "Percent of MSA-sector emp in ind ignoring census weight, `year'"
	lab var MSAIndSecPctSQ`yr' "Percent of MSA-sector emp in ind squared, `year'"
	lab var MSAIndSecUnwPctSQ`yr' "Percent of MSA-sector emp in ind ignoring census weight squared, `year'"
	
	bysort msa: egen MSApctEmpHighHC`yr' = total(indHCtop`yr' * MSAIndPct`yr')
	bysort msa: egen MSApctEmpLowHC`yr' = total(indHCbot`yr' * MSAIndPct`yr')
	bysort msa: egen MSApctEmpHighHCsec`yr' = total(indHCsector_top`yr' * MSAIndPct`yr')
	bysort msa: egen MSApctEmpLowHCsec`yr' = total(indHCsector_bot`yr' * MSAIndPct`yr')

	lab var MSApctEmpHighHC`yr' "Percent of MSA's emp in top quartile of HC industries"
	lab var MSApctEmpLowHC`yr' "Percent of MSA's emp in bottom quartile of HC industries"
	lab var MSApctEmpHighHCsec`yr' "Percent of MSA's emp in inds in top quart. of their sector's HC dist"
	lab var MSApctEmpLowHCsec`yr' "Percent of MSA's emp in inds in bottom quart. of their sector's HC dist"

	bysort msa: egen MSAHerf`yr' = total(MSAIndPctSQ`yr')
	bysort msa: egen MSAUnwHerf`yr' = total(MSAIndUnwPctSQ`yr')
	bysort msa sector: egen MSASecHerf`yr' = total(MSAIndSecPctSQ`yr')
	bysort msa sector: egen MSASecUnwHerf`yr' = total(MSAIndSecUnwPctSQ`yr')

	lab var MSAHerf`yr' "Herfindahl index for MSA"
	lab var MSAUnwHerf`yr' "Herfindahl index for MSA ignoring census weight"

	lab var MSASecHerf`yr' "Herfindahl index for MSA-sector"
	lab var MSASecUnwHerf`yr' "Herfindahl index for MSA-sector ignoring census weight"
}

label data "MSA/industry-level employment data with industry characteristics"
save MSAIndustryEmp`t', replace

unab msav: MSAEmp?? MSAUnwEmp?? MSApctEmpHighHC?? MSApctEmpLowHC?? MSAHerf?? MSAUnwHerf??
unab secv: MSASectorEmp?? MSASectorUnwEmp?? MSApctEmpHighHCsec?? MSApctEmpLowHCsec?? MSASecHerf?? MSASecUnwHerf??

foreach var of varlist `secv' {
	local l`var': var lab `var'
}

keep msa sector `msav' `secv'
bysort msa sector: keep if _n == 1
reshape i msa
reshape j sector
reshape xij `secv'
reshape xi `msav'
reshape
reshape wide

foreach var in `secv' {
	foreach s in 1 3 5 7 9 {
		local secd = ""
		if `s' == 1 local secd = "Extraction"
		if `s' == 3 local secd = "Manufacturing"
		if `s' == 5 local secd = "Sales"
		if `s' == 7 local secd = "Services"
		if `s' == 9 local secd = "Public"

		lab var `var'`s' "`l`var'' (`secd')"
	}
}

order msa `msav'
sort msa 
save MSAIndustryData`t', replace

/*
use MSAIndustryEmp`t', clear
sort ind1990 msa

merge ind1990 using `ipums'FEind70 `ipums'FEind80 `ipums'FEind90 `ipums'FEind00, _merge(_indfe)

tab1 _indfe*

foreach wageyr in 70 80 90 00 {
	foreach empyr in 70 80 90 00 {
		foreach g in all pm {
			if "`wageyr'" == "70" local wageyear = "1970"
			if "`wageyr'" == "80" local wageyear = "1980"
			if "`wageyr'" == "90" local wageyear = "1990"
			if "`wageyr'" == "00" local wageyear = "2000"
		
			if "`empyr'" == "70" local empyear = "1970"
			if "`empyr'" == "80" local empyear = "1980"
			if "`empyr'" == "90" local empyear = "1990"
			if "`empyr'" == "00" local empyear = "2000"

			if "`g'" == "pm" local group = "prime-age males"
			if "`g'" == "all" local group = "all workers"

			gen wageFE`g'`wageyr'Xemp`empyr' = FElogcontrolled_`g'`wageyr' * MSAIndEmp`empyr' / MSAEmp`empyr'
			bysort msa: gen predwage`wageyr'`g'`empyr' = sum(wageFE`g'`wageyr'Xemp`empyr')

			lab var wageFE`g'`wageyr'Xemp`empyr' "Ind. wage FE from `wageyear' from `group' X MSA ind. emp. in `empyear'"
			lab var predwage`wageyr'`g'`empyr' "MSA average wage FE in `wageyear' from `group' given `empyear' industry dist."
		}
	}
}

keep msa predwage* MSApctEmp* MSAHerf* MSAUnwHerf* MSAEmp* MSAUnwEmp*
bysort msa: keep if _n == 1

save MSApredictedwages`t', replace

*/

