*************************************************************************************
*   Replicate Figures 10 and 11                                                     *
*************************************************************************************


capture log close
log using "$folder/Output/figure10_11.log", replace

*************************************************************************************
*   Restrict to individuals born 1940-1980                                          *
*************************************************************************************


use     "$folder/census_acs", clear

keep if birthyr>=1946 & birthyr<=1980 & age>=20 & age<=59


*************************************************************************************
*   Clean variables and generate some new variables                                 *
*     - Convert monetary varibles to real 2012 USD                                  *
*         - Use CPI in year previous to survey year                                 *
*         - Source: ftp://ftp.bls.gov/pub/special.requests/cpi/cpiai.txt            *
*************************************************************************************


replace ftotinc  = . if ftotinc  == 9999999
replace incwage  = . if incwage  ==  999999
foreach j of varlist ftotinc incwage {
    replace `j' = `j'*229.594/166.6   if year==2000
    replace `j' = `j'*229.594/188.9   if year==2005
    replace `j' = `j'*229.594/195.3   if year==2006
    replace `j' = `j'*229.594/201.6   if year==2007
    replace `j' = `j'*229.594/207.342 if year==2008
    replace `j' = `j'*229.594/215.303 if year==2009
    replace `j' = `j'*229.594/214.537 if year==2010
    replace `j' = `j'*229.594/218.056 if year==2011
    gen     `j'_zero = `j'
    replace `j'_zero = 0 if `j'==.
    }
lab var ftotinc "Total family income (real 2012 USD)"
lab var incwage "Wage and salary income (real 2012 USD)"
lab var ftotinc_zero  "Total family income (real 2012 USD) (missing values set to 0)"
lab var incwage_zero  "Wage and salary income (real 2012 USD) (missing values set to 0)"

gen     male = 2 - sex
lab var male "Male"

gen     yrs = 0 if educd==11
replace yrs = 0 if educd==12
replace yrs = 1 if educd==14
replace yrs = 2 if educd==15
replace yrs = 3 if educd==16
replace yrs = 4 if educd==17
egen    avg = mean(yrs)
scalar  grp10 = avg[1]
global  grp10 = grp10
drop    yrs avg
gen     yrs = educd-17 if educd==22 | educd==23
egen    avg = mean(yrs)
scalar  grp21 = avg[1]
global  grp21 = grp21
drop    yrs avg
gen     yrs = educd-18 if educd==25 | educd==26
egen    avg = mean(yrs)
scalar  grp24 = avg[1]
global  grp24 = grp24
drop    yrs avg
gen     educ_years = 0      if educd==0
replace educ_years = 0      if educd==1
replace educ_years = 0      if educd==2
replace educ_years = $grp10 if educd==10
replace educ_years = 0      if educd==11
replace educ_years = 0      if educd==12
replace educ_years = 2.5    if educd==13
replace educ_years = 1      if educd==14
replace educ_years = 2      if educd==15
replace educ_years = 3      if educd==16
replace educ_years = 4      if educd==17
replace educ_years = 6.5    if educd==20
replace educ_years = $grp21 if educd==21
replace educ_years = 5      if educd==22
replace educ_years = 6      if educd==23
replace educ_years = $grp24 if educd==24
replace educ_years = 7      if educd==25
replace educ_years = 8      if educd==26
replace educ_years = 9      if educd==30
replace educ_years = 10     if educd==40
replace educ_years = 11     if educd==50
replace educ_years = 12     if educd==60
replace educ_years = 12     if educd==61
replace educ_years = 12     if educd==62
replace educ_years = 12     if educd==63
replace educ_years = 12     if educd==64
replace educ_years = 12     if educd==65
replace educ_years = 13     if educd==70
replace educ_years = 13     if educd==71
replace educ_years = 14     if educd==80
replace educ_years = 14     if educd==81
replace educ_years = 14     if educd==82
replace educ_years = 14     if educd==83
replace educ_years = 15     if educd==90
replace educ_years = 16     if educd==100
replace educ_years = 16     if educd==101
replace educ_years = 17     if educd==110
replace educ_years = 18     if educd==111
replace educ_years = 19     if educd==112
replace educ_years = 20     if educd==113
replace educ_years = 18     if educd==114
replace educ_years = 18     if educd==115
replace educ_years = 20     if educd==116
lab var educ_years "Completed years of education"
assert  educ_years!=2.5 & educ_years!=6.5

gen     educ_lesshs =      educd<=61
replace educ_lesshs = . if educd==.
lab var educ_lesshs "Education: Did not complete high school/GED"

gen     educ_hs =      educd>=62
replace educ_hs = . if educd==.
lab var educ_hs "Education: Completed at least high school/GED"

gen     educ_somecoll =      educd>=65
replace educ_somecoll = . if educd==.
lab var educ_somecoll "Education: Attended at least some collage"

gen     educ_bachelor =      educd>=101
replace educ_bachelor = . if educd==.
lab var educ_bachelor "Education: Bachelor's degree or higher"

gen     hours = uhrswork
lab var hours "Hours work per week (zero if did not work)"

xi:     areg WKSWORK1 i.WKSWORK2*i.male i.age if year<=2007, a(statef)
predict avg2
sum     avg2 if year<=2007
sum     avg2 if year> 2007
gen     weeks = WKSWORK1 if year>=2000 & year<=2007
replace weeks = avg2     if year>=2008 & year<=2011
lab var weeks "Weeks worked in past year"

gen     hoursempl = hours if hours!=0
lab var hoursempl "Hours work per week if >0"

gen     weeksempl = weeks if weeks!=0
lab var weeksempl "Weeks worked in pat year if >0"


*************************************************************************************
*   Account for PUMA changes in Louisiana after Hurricane Katrina                   *
*     - https://usa.ipums.org/usa-action/variables/PUMA#comparability_tab: "due to  *
*       population displacement following Hurricane Katrina, three PUMA's (01801,   *
*       01802, and 01905) are combined into code 77777 for the 2006-onward ACS"     *
*     - I generate a flag "katrina" equal to one for a resident of any of these     *
*       PUMAs at any point in the IPUMS data                                        *
*     - For data in 2000 and 2005, I aggregate the pre-Katrina PUMAs together       *
*       so that the coding is consistently PUMA 77777                               *
*************************************************************************************


gen     katrina = statefip==22 & (puma==1801 | puma==1802 | puma==1905 | puma==77777)
lab var katrina "Resident of PUMA whose boundaries were affected by Hurricane Katrina"

replace puma   = 77777 if katrina==1


*************************************************************************************
*   Collapse data to generate one observation for each unique combination of        *
*     - Year of birth                                                               *
*     - Year of survey                                                              *
*     - PUMA                                                                        *
*   Do overall and for men alone                                                    *
*************************************************************************************


tostring statefip, gen(statefip_s)
tostring puma    , gen(puma_s)
replace  statefip_s = "0"   +statefip_s if statefip<=9
replace  puma_s     = "0000"+puma_s     if puma<=9
replace  puma_s     = "000" +puma_s     if puma>=10   & puma<=99
replace  puma_s     = "00"  +puma_s     if puma>=100  & puma<=999
replace  puma_s     = "0"   +puma_s     if puma>=1000 & puma<=9999
gen      id = statefip_s + puma_s
lab var  id "Unique ID (statefip+puma)"


preserve
keep         incwage hours weeks hoursempl weeksempl          perwt                birthyr year id male
collapse     incwage hours weeks hoursempl weeksempl (rawsum) perwt [aw=perwt], by(birthyr year id male)
reshape wide incwage hours weeks hoursempl weeksempl perwt, i(birthyr year id) j(male)
drop    *0
rename  incwage1   incwage_men
rename  hours1     hours_men
rename  weeks1     weeks_men
rename  hoursempl1 hoursempl_men
rename  weeksempl1 weeksempl_men
rename  perwt1     perwt_men
tempfile  men
save    "`men'"
restore


keep     statefip puma age katrina educ_hs educ_somecoll educ_bachelor ftotinc     perwt      birthyr year id
collapse statefip puma age katrina educ_hs educ_somecoll educ_bachelor ftotinc [aw=perwt], by(birthyr year id)
merge    1:1 birthyr year id using "`men'"
drop     _merge


*************************************************************************************
*   Flag residents of Chicago, Los Angeles, and New York City PMSAs                 *
*************************************************************************************


preserve
use     "$folder/crosswalk", clear
keep if pmsa==1600 | pmsa==4480 | pmsa==5600
keep    statefip puma
sort    statefip puma
keep if statefip!=statefip[_n-1] | puma!=puma[_n-1]
gen     chicago_la_ny = 1
tempfile  chicago_la_ny
save    "`chicago_la_ny'"
restore
merge   m:1 statefip puma using "`chicago_la_ny'"
drop if _merge==2
drop    _merge
replace chicago_la_ny = 0 if chicago_la_ny==.


*************************************************************************************
*   Add PUMA population in 2000 from PUMA-county crosswalk                          *
*     - Observations from Louisiana (FIPS code 22) PUMA 77777 appear in the IPUMS   *
*       data but not the crosswalk.  These are a result of PUMA renumbering after   *
*       Katrina.  I sum the populations of the the original three pre-Katrina PUMAs.*
*     - Observations from Puerto Rice (FIPS code 72) appear in the crosswalk but    *
*       are not the IPUMS data                                                      *
*************************************************************************************


preserve
use     "$folder/crosswalk", clear
keep if level==780
egen    popnew  = sum(pop2000) if statefip==22 & (puma==1801 | puma==1802 | puma==1905)
replace pop2000 = popnew       if statefip==22 & (puma==1801 | puma==1802 | puma==1905)
drop if statefip==22 & (puma==1801 | puma==1802)
replace puma = 77777 if statefip==22 & puma==1905
keep    statefip puma pop2000
tempfile  pumapop
save    "`pumapop'"
restore
merge   m:1 statefip puma using "`pumapop'"
list    statefip puma if _merge==1, sep(0)
list    statefip puma if _merge==2, sep(0)
drop if _merge==2
drop    _merge


*************************************************************************************
*   Merge clinic information onto main IPUMS data                                   *
*   First, merge PUMAs onto clinic data                                             *
*     - Inflate county codes by 10 to correspond to IPUMS data                      *
*     - Miami-Dade county in Florida has county FIPS                                *
*         - 250 in IPUMS data                                                       *
*             - See: https://usa.ipums.org/usa/volii/ICPSR.shtml                    *
*         - 250 in the clinic file                                                  *
*             - See: http://mcdc.missouri.edu/websas/geocorr90_htmls/counties.html  *
*         - 860 in the crosswalk                                                    *
*             - See: http://www.itl.nist.gov/fipspubs/co-codes/fl.txt               *
*       I therefore recode the crosswalk FIPS to be 25                              *
*     - There are no observations in the clinic file that cannot be matched         *
*       to the crosswalk. There are several observations in the crosswalk that      *
*       cannot be matched to the clinic file but, double-checking the crosswalk     *
*       with https://usa.ipums.org/usa/volii/ICPSR.shtml, I am unable to figure     *
*       out the source of these omissions.  Regardless, though, all of the          *
*       observations in the clinic file are matched to the crosswalk file.          *
*************************************************************************************


preserve
use     "$folder/vs_fo_final_extract", clear
drop    fips
keep if fp_year_p74_fed!=.
rename  stfips statefip
rename  cofips county
replace county = county*10
tempfile  clinics
save    "`clinics'"
use     "$folder/crosswalk", clear
keep if level==781
keep    statefip county puma
replace county = 250 if statefip==12 & county==860
merge   m:1 statefip county using "`clinics'"
keep if _merge==3
drop    _merge
sort    statefip puma fp_year_p74_fed
gen     order = 1 if statefip!=statefip[_n-1] | puma!=puma[_n-1]
replace order = order[_n-1] + 1 if order!=1
reshape wide county fp_year_p74_fed, i(statefip puma) j(order)
order   statefip puma county* fp_year_p74_fed*
tempfile  clinics_by_puma
save    "`clinics_by_puma'"
restore
merge   m:1 statefip puma using "`clinics_by_puma'"
list    statefip county* puma if _merge==2
drop if _merge==2
drop    _merge
replace county1          = 710 if katrina==1
replace fp_year_p74_fed1 = 67  if katrina==1
lab var county1 "County of earliest clinic recorded in PUMA"
lab var county2 "County of  second-earliest clinic recorded in PUMA (if >1 clinic  in PUMA)"
lab var county3 "County of   third-earliest clinic recorded in PUMA (if >2 clinics in PUMA)"
lab var county4 "County of  fourth-earliest clinic recorded in PUMA (if >3 clinics in PUMA)"
lab var county5 "County of   fifth-earliest clinic recorded in PUMA (if >4 clinics in PUMA)"
lab var county6 "County of   sixth-earliest clinic recorded in PUMA (if >5 clinics in PUMA)"
lab var county7 "County of seventh-earliest clinic recorded in PUMA (if >6 clinics in PUMA)"
lab var county8 "County of  eighth-earliest clinic recorded in PUMA (if >7 clinics in PUMA)"
rename  fp_year_p74_fed1 fp_date1
rename  fp_year_p74_fed2 fp_date2
rename  fp_year_p74_fed3 fp_date3
rename  fp_year_p74_fed4 fp_date4
rename  fp_year_p74_fed5 fp_date5
rename  fp_year_p74_fed6 fp_date6
rename  fp_year_p74_fed7 fp_date7
rename  fp_year_p74_fed8 fp_date8
lab var fp_date1 "Date of earliest clinic recorded in PUMA"
lab var fp_date2 "Date of  second-earliest clinic recorded in PUMA (if >1 clinic  in PUMA)"
lab var fp_date3 "Date of   third-earliest clinic recorded in PUMA (if >2 clinics in PUMA)"
lab var fp_date4 "Date of  fourth-earliest clinic recorded in PUMA (if >3 clinics in PUMA)"
lab var fp_date5 "Date of   fifth-earliest clinic recorded in PUMA (if >4 clinics in PUMA)"
lab var fp_date6 "Date of   sixth-earliest clinic recorded in PUMA (if >5 clinics in PUMA)"
lab var fp_date7 "Date of seventh-earliest clinic recorded in PUMA (if >6 clinics in PUMA)"
lab var fp_date8 "Date of  eighth-earliest clinic recorded in PUMA (if >7 clinics in PUMA)"


*************************************************************************************
*   Figures 10 and 11                                                               *
*************************************************************************************


gen     age2 = age*age

drop if chicago_la_ny==1

local X "_Ibirthyr* _Ist* _Texp* age age2"

drop if fp_date1==.

replace fp_date1 = 1900 + fp_date1
gen     exp = birthyr - fp_date1
replace exp = 0 if fp_date1==.
assert  exp>=-33 & exp<=15
egen    exp_g = cut(exp), at(-33,-19,-14,-9,-4,1,6,11,16)
char    exp_g[omit] -4
xi,     prefix(_T) i.exp_g
xi      i.birthyr i.statefip*i.birthyr
local r replace
tokenize `"ftotinc incwage_men hours_men hoursempl_men weeks_men weeksempl_men educ_hs educ_somecoll educ_bachelor"'
while "`1'"!="" {
    areg `1' `X', cluster(id) a(id)
    sum `1' if e(sample) & exp_g==-4
    local rmean = r(mean)
    testparm _Texp_g_6 _Texp_g_7
    outreg2 _Texp* using "$folder/Output/figure10_11.xls", `r' br title(`e(depvar)') ctitle(`e(cmdline)') addstat("Mean DV t=-4", `rmean', "P 58,62", `r(p)') bdec(6)
    local r append
    macro shift
    }


log close