//This calculates means of family income for 13 different occupation categories and of the cross-section data. This is done only for 1983 through 2003 for consistency of the variable.

// We use family weights to calculate the means.
//Written by NLZ 6/29/10. Rewritten by NLZ 7/5/10 to make simpler to export results to Excel. Modified, proofed JP 12/10

clear all
set mem 4g
capture log close
set more 1
log using average_incs_cross_section_occ.txt, t replace
//if ages = 1, then no age cut. if ages=0, then age cut
local ages = 1

use full_cross_section_occ_remap_6809_percentiles

if(`ages'==0){
keep if agely>=20 & agely<=65
}
merge 1:1 recnum year using occmly_data
drop if _merge==2
drop _merge

keep if year>=1983 & year<2003

//bysort year: tab occupation_NLZ [fweight=famwgt]

bysort year: egen total_income = sum(famwgt*faminc)

//Calculating average income over all percentiels for the occupation categories I defined
//They then shouldn't figure into the mean calculation
forvalues i = 1/13{
gen temp_indic = occmly==`i'
bysort year: gen total_weight_`i' = sum(temp_indic*famwgt) 
bysort year: replace total_weight_`i' = total_weight_`i'[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_`i'
 bysort year: egen mean_occupation_`i' = sum(wtdwage)
bysort year: replace mean_occupation_`i'=mean_occupation_`i'[_N]
gen total_inc_occupation_`i' = total_weight_`i'*mean_occupation_`i'
gen frac_inc_occupation_`i' = total_inc_occupation_`i'/total_income
drop wtdwage temp_indic
}

gen temp_indic = occmly==0 | occmly==14 | occmly==15
bysort year: gen total_weight_other = sum(temp_indic*famwgt) 
bysort year: replace total_weight_other = total_weight_other[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_other
 bysort year: egen mean_occupation_other = sum(wtdwage)
bysort year: replace mean_occupation_other=mean_occupation_other[_N]
gen total_inc_occupation_other = total_weight_other*mean_occupation_other
gen frac_inc_occupation_other = total_inc_occupation_other/total_income

//output the results
collapse (firstnm) mean_* frac_inc_* total_weight_*, by(year)

save ave_incs_occupation, replace


// top 1% ******
use full_cross_section_occ_remap_6809_percentiles, clear

if(`ages'==0){
keep if agely>=20 & agely<=65
}
merge 1:1 recnum year using occmly_data
drop if _merge==2
drop _merge

keep if percentiles_faminc==100

//this should be redundant
keep if year>=1983 & year<2003

//bysort year: tab occupation_NLZ [fweight=famwgt]

bysort year: egen total_income = sum(famwgt*faminc)

//Calculating average income over all percentiels for the occupation categories I defined
//They then shouldn't figure into the mean calculation
forvalues i = 1/13{
gen temp_indic = occmly==`i'
bysort year: gen total_weight_`i' = sum(temp_indic*famwgt) 
bysort year: replace total_weight_`i' = total_weight_`i'[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_`i'
 bysort year: egen mean_occupation_`i' = sum(wtdwage)
bysort year: replace mean_occupation_`i'=mean_occupation_`i'[_N]
gen total_inc_occupation_`i' = total_weight_`i'*mean_occupation_`i'
gen frac_inc_occupation_`i' = total_inc_occupation_`i'/total_income
drop wtdwage temp_indic
}

gen temp_indic = occmly==0 | occmly==14 | occmly==15
bysort year: gen total_weight_other = sum(temp_indic*famwgt) 
bysort year: replace total_weight_other = total_weight_other[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_other
 bysort year: egen mean_occupation_other = sum(wtdwage)
bysort year: replace mean_occupation_other=mean_occupation_other[_N]
gen total_inc_occupation_other = total_weight_other*mean_occupation_other
gen frac_inc_occupation_other = total_inc_occupation_other/total_income

//output the results
collapse (firstnm) mean_* frac_inc_* total_weight_*, by(year)

save ave_incs_occupation_top, replace
log close
