//This calculates means of family income for races and education of the cross-section data. 
// Creates Cross-Section Race & Education sheet

clear all
set mem 4g
capture log close
set more 1
log using average_incs_cross_section_race.txt, t replace
//if ages = 1, then no age cut. if ages=0, then age cut
local ages = 1

use full_cross_section_occ_remap_6809_percentiles

if(`ages'==0){
keep if agely>=20 & agely<=65
}

//uncomment this line if want to cut to top 1%
//keep if percentiles_faminc==100

di "White"

//I think you only have to put the if here. This will generate missing for the weights outside this range.
//They then shouldn't figure into the mean calculation
gen temp_indic = race==1
bysort year: gen total_weight_white = sum(temp_indic*famwgt) 
bysort year: replace total_weight_white = total_weight_white[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_white
 bysort year: egen mean_white = sum(wtdwage)
bysort year: replace mean_white=mean_white[_N]
drop wtdwage temp_indic

di "Black"
gen temp_indic = race==2
bysort year: gen total_weight_black = sum(temp_indic*famwgt) 
bysort year: replace total_weight_black = total_weight_black[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_black
 bysort year: egen mean_black = sum(wtdwage)
bysort year: replace mean_black=mean_black[_N]
drop wtdwage temp_indic

di "Not Black or white"
gen temp_indic = race==3
bysort year: gen total_weight_other = sum(temp_indic*famwgt) 
bysort year: replace total_weight_other = total_weight_other[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_other
 bysort year: egen mean_other = sum(wtdwage)
bysort year: replace mean_other=mean_other[_N]
drop wtdwage temp_indic

//education cutoffs are based on Autor cleaning code that recodes the education variables for consistency over time
di "Less than high school"
gen temp_indic = school==1
bysort year: gen total_weight_less_12 = sum(temp_indic*famwgt) 
bysort year: replace total_weight_less_12 = total_weight_less_12[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_less_12
 bysort year: egen mean_less_12 = sum(wtdwage)
bysort year: replace mean_less_12=mean_less_12[_N]
drop wtdwage temp_indic

di "High school, no college"
gen temp_indic = school==2
bysort year: gen total_weight_12 = sum(temp_indic*famwgt) 
bysort year: replace total_weight_12 = total_weight_12[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_12
 bysort year: egen mean_12 = sum(wtdwage)
bysort year: replace mean_12=mean_12[_N]
drop wtdwage temp_indic

di "Some college"
gen temp_indic = school==3
bysort year: gen total_weight_13_15 = sum(temp_indic*famwgt) 
bysort year: replace total_weight_13_15 = total_weight_13_15[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_13_15
 bysort year: egen mean_13_15 = sum(wtdwage)
bysort year: replace mean_13_15=mean_13_15[_N]
drop wtdwage temp_indic

di "College graduate"
gen temp_indic = school==4
bysort year: gen total_weight_16 = sum(temp_indic*famwgt) 
bysort year: replace total_weight_16 = total_weight_16[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_16
 bysort year: egen mean_16 = sum(wtdwage)
bysort year: replace mean_16=mean_16[_N]
drop wtdwage temp_indic

di "Post college"
gen temp_indic = school==5
bysort year: gen total_weight_17 = sum(temp_indic*famwgt) 
bysort year: replace total_weight_17 = total_weight_17[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight_17
 bysort year: egen mean_17 = sum(wtdwage)
bysort year: replace mean_17=mean_17[_N]
drop wtdwage temp_indic

//output the results
collapse (firstnm) mean_* total_weight_* year_earned, by(year)

order year_earned year mean_white mean_black mean_other mean_less_12 mean_12 mean_13_15 mean_16 mean_17 total_weight_white total_weight_black total_weight_other total_weight_less_12 total_weight_12 total_weight_13_15 total_weight_16 total_weight_17
save ave_incs_race_ed, replace
log close
