//This calculates a variety of demographic summary statistics for top 1% of families.
//Creates file CPS High Earn 1d

clear all
set mem 4g
capture log close
set more 1
log using top_inc_summary_stats.txt, t replace

//full population summary stats
forvalues j = 0/1 {

use full_cross_section_occ_remap_6809_percentiles
drop if year<1977

gen retired = 1 if esr==7
gen with_kids = _child18>0

gen married = 1 if marstat<=2
replace married = 1 if marstat==3 & year>=1988 & married==.
replace married = 1 if marstat==3 | marstat==4 & year<1988 & year>=1976 & married==.
replace married = 1 if marstat==5 | marstat==4 & year<1976 & married==.
replace married = 0 if married ==.

if(`j'==1){
keep if agely>=20 & agely<=65
}

//number of families in various industry groups
forvalues i = 1/8{
gen temp_indic = industry_recode==`i'
bysort year: gen total_weight_ind_`i' = sum(temp_indic*famwgt) 
bysort year: replace total_weight_ind_`i' = total_weight_ind_`i'[_N]
drop temp_indic
}

//number of families in various race groups

gen temp_indic = black==1
bysort year: gen total_weight_race_black = sum(temp_indic*famwgt) 
bysort year: replace total_weight_race_black = total_weight_race_black[_N]
drop temp_indic

gen temp_indic = white==1
bysort year: gen total_weight_race_white = sum(temp_indic*famwgt) 
bysort year: replace total_weight_race_white = total_weight_race_white[_N]
drop temp_indic

gen temp_indic = other==1
bysort year: gen total_weight_race_other = sum(temp_indic*famwgt) 
bysort year: replace total_weight_race_other = total_weight_race_other[_N]
drop temp_indic

//number of families in various race groups
forvalues i = 1/5{
gen temp_indic = school==`i'
bysort year: gen total_weight_school_`i' = sum(temp_indic*famwgt) 
bysort year: replace total_weight_school_`i' = total_weight_school_`i'[_N]
drop temp_indic
}

//some means for a variety of dummy variables
bysort year: egen total_weight = sum(famwgt)
foreach i of varlist with_kids married retired selfemp _child18 agely{
bysort year: egen mean_`i' = sum(famwgt*`i')
replace mean_`i' = mean_`i'/total_weight
}

//output the results
collapse (firstnm) mean_* total_weight_*, by(year)
if(`j'==1){
save sum_stats_age_cut, replace
}
else{
save sum_stats, replace
}
}

//full population summary stats
forvalues k = 0/1 {

use full_cross_section_occ_remap_6809_percentiles
drop if year<1977

* TOP 1% 
keep if percentiles_faminc==100

gen retired = 1 if esr==7
gen with_kids = _child18>0

gen married = 1 if marstat<=2
replace married = 1 if marstat==3 & year>=1988 & married==.
replace married = 1 if marstat==3 | marstat==4 & year<1988 & year>=1976 & married==.
replace married = 1 if marstat==5 | marstat==4 & year<1976 & married==.
replace married = 0 if married ==.

if(`k'==1){
keep if agely>=20 & agely<=65
}

//number of families in various industry groups
forvalues i = 1/8{
gen temp_indic = industry_recode==`i'
bysort year: gen total_weight_ind_`i' = sum(temp_indic*famwgt) 
bysort year: replace total_weight_ind_`i' = total_weight_ind_`i'[_N]
drop temp_indic
}

//number of families in various race groups

gen temp_indic = black==1
bysort year: gen total_weight_race_black = sum(temp_indic*famwgt) 
bysort year: replace total_weight_race_black = total_weight_race_black[_N]
drop temp_indic

gen temp_indic = white==1
bysort year: gen total_weight_race_white = sum(temp_indic*famwgt) 
bysort year: replace total_weight_race_white = total_weight_race_white[_N]
drop temp_indic

gen temp_indic = other==1
bysort year: gen total_weight_race_other = sum(temp_indic*famwgt) 
bysort year: replace total_weight_race_other = total_weight_race_other[_N]
drop temp_indic

//number of families in various race groups
forvalues i = 1/5{
gen temp_indic = school==`i'
bysort year: gen total_weight_school_`i' = sum(temp_indic*famwgt) 
bysort year: replace total_weight_school_`i' = total_weight_school_`i'[_N]
drop temp_indic
}

//some means for a variety of dummy variables
bysort year: egen total_weight = sum(famwgt)
foreach i of varlist with_kids married retired selfemp _child18 agely{
bysort year: egen mean_`i' = sum(famwgt*`i')
replace mean_`i' = mean_`i'/total_weight
}

//output the results
collapse (firstnm) mean_* total_weight_*, by(year)
if(`k'==1){
save sum_stats_age_cut_top, replace
}
else{
save sum_stats_top, replace
}
}


log close
