//This calculates means of family incomes for various percentiles of the cross-section data. 
// Creates Cross Section Percentiles

clear all
set mem 4g
capture log close
set more 1
log using average_incs_cross_section.txt, t replace
//if ages = 1, then no age cut. if ages=0, then age cut 

local ages = 1

use full_cross_section_occ_remap_6809_percentiles

if(`ages'==0){
keep if agely>=20 & agely<=65
}


//this creates a category variable for what quintile you're in.
di "Total average"
bysort year: egen total_weight = total(famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = faminc*famwgt/total_weight
 bysort year: egen mean_total = total(wtdwage)
//bysort year: replace mean_total=mean_total[_N]

drop wtdwage total_weight

di "Percentiles 0-20"

//I think you only have to put the if here. This will generate missing for the weights outside this range.
//They then shouldn't figure into the mean calculation
gen temp_indic = percentiles_faminc >0 & percentiles_faminc <=20
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_0_20 = total(wtdwage)
//bysort year: replace mean_0_20=mean_0_20[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 20-40"
gen temp_indic = percentiles_faminc >20 & percentiles_faminc <=40
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_20_40 = total(wtdwage)
//bysort year: replace mean_20_40=mean_20_40[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 40-60"
gen temp_indic = percentiles_faminc >40 & percentiles_faminc <=60
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_40_60 = total(wtdwage)
//bysort year: replace mean_40_60=mean_40_60[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 60-80"
gen temp_indic = percentiles_faminc >60 & percentiles_faminc <=80
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_60_80 = total(wtdwage)
//bysort year: replace mean_60_80=mean_60_80[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 0-10"
gen temp_indic = percentiles_faminc >0 & percentiles_faminc <=10
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_0_10 = total(wtdwage)
//bysort year: replace mean_0_10=mean_0_10[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 10-20"
gen temp_indic = percentiles_faminc >10 & percentiles_faminc <=20
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_10_20 = total(wtdwage)
//bysort year: replace mean_10_20=mean_10_20[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 80-90"
gen temp_indic = percentiles_faminc >80 & percentiles_faminc <=90
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_80_90 = total(wtdwage)
//bysort year: replace mean_80_90=mean_80_90[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 90-95"
gen temp_indic = percentiles_faminc >90 & percentiles_faminc <=95
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_90_95 = total(wtdwage)
//bysort year: replace mean_90_95=mean_90_95[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 95-99"
gen temp_indic = percentiles_faminc >95 & percentiles_faminc <=99
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_95_99 = total(wtdwage)
//bysort year: replace mean_95_99=mean_95_99[_N]
drop wtdwage total_weight temp_indic

di "Percentiles 99-100"
gen temp_indic = percentiles_faminc >99 & percentiles_faminc <=100
bysort year: egen total_weight = total(temp_indic*famwgt) 
//bysort year: replace total_weight = total_weight[_N]
gen wtdwage = temp_indic*faminc*famwgt/total_weight
 bysort year: egen mean_99_100 = total(wtdwage)
//bysort year: replace mean_99_100=mean_99_100[_N]
drop wtdwage total_weight temp_indic

//output the results
collapse (firstnm) mean_* year_earned, by(year)

order year_earned year mean_total mean_0_10 mean_10_20 mean_0_20 mean_20_40 mean_40_60 mean_60_80 mean_80_90 mean_90_95 mean_95_99 mean_99_100

save ave_incs.dta, replace
log close
