/*This do file creates the main data to replicate main tables in Hoynes
and Schanzenbach (2018)*/

*Prelims
clear all
set more off, perm
cap log close

*Set directories and log file
cap cd "./replication/"
log using "./log/create_data.log", replace

**Need to set seed to exactly replicate
set seed 39281

****************************************************************************
*Bring in and save aux data
****************************************************************************
*CPI-U
import excel using "./raw/cpiu.xlsx", clear firstrow
	rename Taxyear year
		destring year, replace
	rename CPIURS, lower
	keep year cpiurs
save "./dta/cpiu.dta", replace

*FIPS-abbreviation-statename xwalk
import excel using "./raw/xwalk_fipst.xlsx", firstrow clear
	rename Postal stabb
	rename FIPS stfips
		drop if regexm(stfips, "Code") == 1 //drop notes
		destring stfips, replace
	rename _all, lower
		replace state = strtrim(upper(state))
	tempfile xwalk
		
save "./dta/xwalk_fipst.dta", replace

*Child population
*Intercensal estimates 1990-1999
import delimited "./raw/pop/us-est90int-09.csv", clear rowrange(3) varn(3)
	desc *
	*Make sure keeping totals
	assert v3 == v4+v5
	keep apr allage v3
	rename apr year
		*Just keep july estimates
		keep if regexm(year, "uly") == 1
		replace year = substr(year, -4,4)
		destring year, replace
		tab year, mi
	rename all age
	replace age = "" if regexm(age, "Age") == 1
		destring age, replace ignore("+")
		keep if age <= 18
		tab age, mi
	rename v3 pop
	tab age, mi
	tab year, mi
	
	*Collapse to year level
	collapse (sum) pop, by(year)
	
	*tempfile 
	tempfile ic9099
		save `ic9099', replace
		
*Intercensal estimates 2000-2010		
import delimited "./raw/pop/us-est00int-alldata.csv", clear
	tab age, mi
		keep if age <= 18
	*Just keep july estimates
	tab month year
	keep if month == 7
	rename tot_pop pop
	keep year age pop
	keep if year < 2010 //use more recent year for 2010
	
	*Collapse to year level
	collapse (sum) pop, by(year)
	
tempfile ic0010
	save `ic0010', replace
	
*Intercensal estimates 2010-2015
import delimited "./raw/pop/nc-est2016-agesex-res.csv", clear
tab age sex
	keep if sex == 0 //totals
	keep age popest*
	*Reshape
	reshape long popestimate, i(age) j(year)
	tab age, mi
		keep if age <= 18
	rename popestimate pop
	
	*Collapse to year level
	collapse (sum) pop, by(year)
	
*Append everything together
append using `ic0010'
append using `ic9099'

*Make sure this looks ok
tab year, su(pop)
sort year

save "./dta/pop_le18.dta", replace

*Now get the share of kids in each FPL bin from the CPS
*Changed this 1/15/2018:
*1) Numerator = Bitler-Hoynes ATTI measure
*2) Denominator = Anchored SPM
*Variable name Income:Poverty ratio = inc_alt1a_spm
use "./dta/cps_BPEA.dta", clear
*Just keep kids
keep if p_age <= 18
*Change year name so consistent w/ rest
drop year
rename calyear year

*Collapse
g one = 1 //counter

*Number kids w/ working parents
preserve
collapse (sum) one [pw = p_marwt], by(worker year)
	drop if worker == .
	egen onetotal = total(one), by(year)
		g pctworker = one/onetotal
		keep *worker year
		reshape wide pctworker, i(year) j(worker)
		
*Make sure this looks ok
	tab year, su(pctworker1)
*Merge to population totals
merge 1:1 year using "./dta/pop_le18.dta", keep(match)
	g numkids_worker0 = pop*pctworker0
	g numkids_worker1 = pop*pctworker1
save "./dta/pctworker.dta", replace
restore	

*income:poverty ratios 
rename h_inc_alt1a_spm spm_inc_pov
	g spmbin = 1 if spm_inc_pov < 0.5
	replace spmbin = 2 if spm_inc_pov >= 0.5 & spm_inc_pov < 1
	replace spmbin = 3 if spm_inc_pov >= 1 & spm_inc_pov < 1.5
	replace spmbin = 4 if spm_inc_pov >= 1.5 & spm_inc_pov < 2
	replace spmbin = 5 if spm_inc_pov >= 2 & spm_inc_pov !=.

tab year spmbin, row mi

*Create final dataset
collapse (sum) one [pw = p_marwt], by(spmbin year)
	drop if spmbin == .
	egen onetotal = total(one), by(year)
		g pctspmbin = one/onetotal
		keep *spmbin year
		reshape wide pctspmbin, i(year) j(spmbin)
*Make sure this looks reasonable
forvalues n = 1/5 {
	tab year, su(pctspmbin`n')
	} 	
*Label all the variables
	forvalues n = 1/5 {
		if `n' == 1 local r = "<50%"
		if `n' == 2 local r = "50-100%"
		if `n' == 3 local r = "100-150%"
		if `n' == 4 local r = "150-200%"
		if `n' == 5 local r = "200%+"
		
		lab var pctspmbin`n' "Pct kids `r' spm pov in March CPS"
		}

	desc*
	
*Reshape for easy merging
reshape long pctspmbin, i(year) j(bin)
	rename pct*bin pct*
	
save "./dta/pctkidbin.dta", replace


*******
*SNAP rules to impute food stamps for TANF/EITC/CTC recipients
*******
*Max grant
import excel "./raw/QC/Maximum-Benefits.xlsx", firstrow clear sheet("Data_max")
	rename FSBenefitfor* snapmax*
	rename *personfamily *
	drop if year == .
	*Generate family size up to 20 ppl
	forvalues n = 5/20 {
		g snapmax`n' = snapmax4 + (eachaddlperson*(`n'-4))
		}
		drop eachaddlperson
	*Reshape long
	reshape long snapmax, i(year ak hi) j(famsize)
	keep if year >= 1990
	
	tempfile max
		save `max', replace
		
*Standard deduction
import excel "./raw/QC/Maximum-Benefits.xlsx", firstrow clear ///
	sheet("Data_disregard") cellrange(A2)
	drop G
	rename pp* disregard*
	rename disregardl* disregard*
	*Indicators for AK, HI
	tab state, mi
		g ak = (regexm(state, "Alaska")) == 1
		g hi = (regexm(state, "Hawaii")) == 1
		drop state
	reshape long disregard, i(year ak hi) j(famsize)
	*Expand for 1-3 ppl families
	tab famsize, mi
	expand 3 if famsize == 13
		bys famsize year ak hi: g n = _n
		tab famsize n
		forvalues n = 1/3 {
			replace famsize = `n' if n == `n' & famsize == 13
			}
			tab famsize, mi
		drop n
	*Expand for 6+ families
	expand 15 if famsize == 6
		bys famsize year ak hi: g n = _n
		tab famsize n
		forvalues n = 1/15 {
			replace famsize = `n'+5 if n == `n' & famsize == 6
			}
			tab famsize, mi
		drop n
	
	tempfile disregard
		save `disregard', replace
		
*Shelter and dependent care deduction
*Q: no limit on dependent care after 2009, v. low take-up before -- how to deal
* w/ this?
import excel "./raw/QC/Maximum-Benefits.xlsx", firstrow clear ///
	sheet("Data_shelter") cellrange(A2)
	drop E
	rename shelter* shelter
	rename child* child
	*Indicators for AK, HI
	tab state, mi
		g ak = (regexm(state, "Alaska")) == 1
		g hi = (regexm(state, "Hawaii")) == 1
		drop state

*Merge to previous files (shelter and child deduction same regardless of fam size)
merge 1:m year ak hi using `disregard', assert(match) nogen
merge 1:1 year famsize ak hi using `max', assert(match) nogen

save "./dta/snaprules.dta", replace

/*Just need to run the following once: add in max TANF grants for 2014 and 2015*/
import excel "./raw/UKCPR_National_Welfare_Data_04122017.xlsx", clear firstrow ///
	sheet("Data") 
	rename _all, lower
	keep year state_name afdctanfbenefit*
	rename state_name stabb
		merge m:1 stabb using "./dta/xwalk_fipst.dta", assert(using match) ///
			keep(match) nogen
	forvalues n = 2/4 {
	rename afdctanfbenefitfor`n' afdcG`n'
	}
	
keep year stfips afdcG*
reshape long afdcG, i(year stfips) j(famsize)
tab year famsize, mi //make sure everything's here

save "./dta/afdc_max_benefits_1980-2015.dta", replace

****************************************************************************
*End aux data creation
****************************************************************************

*******
*Total expenditures for each program
*******

*********************PUBLIC HOUSING******************************
/*Pieces we need
“Annual contributions for assisted housing”
Public housing operations
“Housing certificate fund.” 
TBRA 
PBRA
*/
import excel using "./raw/PublicHousing/outlays.xls", firstrow clear
	rename _all, lower
	keep if agencycode == "025" //hud
	tab accountname, mi
	g voucher = 0
	g housing = 0
		replace voucher = 1 if regexm(accountname, "Annual contributions for assisted") == 1
		replace housing = 1 if regexm(accountname, "Public Housing Operating") == 1 
		replace voucher = 1 if regexm(accountname, "Housing Certificate Fund") == 1
		replace voucher = 1 if regexm(accountname, "Project-based Rental") == 1
		replace voucher = 1 if regexm(accountname, "Tenant Based") == 1
		replace housing = 1 if regexm(accountname, "Public Housing Capital") == 1
		
*Just keep accounts in the housing assistance subfunction
drop if subfunctioncode != "604"
		
	*Make sure everything is in here 
		tab accountname if voucher == 1
		tab accountname if housing == 1
	tab accountname, mi //make sure just have public housing and section 8
		keep if housing == 1 | voucher == 1
	collapse (sum) m-bv, by(housing voucher) //grant and non-grant item
	rename ap year1990
		rename aq year1991
		rename ar year1992
		rename as year1993
		rename at year1994
		rename au year1995
		rename av year1996
		rename aw year1997
		rename ax year1998
		rename ay year1999
		rename az year2000
		rename ba year2001
		rename bb  year2002
		rename bc year2003
		rename bd  year2004
		rename be  year2005
		rename bf  year2006
		rename bg  year2007
		rename bh  year2008
		rename bi  year2009
		rename bj  year2010
		rename bk  year2011
		rename bl  year2012
		rename bm   year2013
		rename bn  year2014
		rename bo   year2015
		g accountname = "housing" if housing == 1
			replace accountname = "voucher" if voucher == 1 & housing != 1
			tab accountname, mi
			tab accountname voucher, mi
			tab accountname housing, mi
		keep accountname year*

		reshape long year, i(accountname)
			rename year amt
			rename _j year
		*Reshape wide again
		reshape wide amt, i(year) j(accountname) string
*Tempfile
tempfile housingamt
	save `housingamt', replace
	
*Now bring in families with kid share
import excel using "./raw/PublicHousing/outlays.xls", firstrow clear sheet("Recipients")
	rename _all, lower
	*Just keep what we need
	keep year pctvouchers* pcthousing*
drop if year == . 
*Check that percents between 0 and 1
foreach var of var pct* {
	assert `var' >= 0 & `var' <= 1
	}

*Merge back to amounts
merge 1:1 year using `housingamt', assert(match) nogen //make sure everything ok	
*Program-level amounts going to families with kids
foreach type in voucher housing {
	g `type' = pct`type'*amt`type'
	}
g totpublic = housing + voucher
			
*Merge to CPI
merge 1:1 year using "./dta/cpiu.dta", keep(master match) 
	assert _merge >=3 //make sure everything matched
	drop _merge
egen cpi2014 = max(cpi*(year == 2014))
	tab cpi2014
g r_totpublic = totpublic*(cpi2014/cpiu)
	*Currently in millions, put into billions
	replace r_totpublic = r_totpublic/1000000
	
	drop cpiu
	
tempfile housing
	save `housing', replace

*********************SSI*********************
import excel using "./raw/annualreport_ssi_2016.xlsx", cellrange(A3) firstrow clear ///
	sheet("Sheet2")
*Clean up variables
rename A year
	tab year, mi
*Limit to payments to children
rename B totssi
keep year totssi

*Just keep years we need
keep if year >= 1990 & year < 2016

*Convert to 2014 dollars to match other programs
merge 1:1 year using "./dta/cpiu.dta", keep(master match) 
	assert _merge >= 3
	drop _merge
egen cpi2014 = max(cpi*(year == 2014))
	tab cpi2014
g r_totssi = totssi*(cpi2014/cpiu)
	*Currently in millions, put into billions
	replace r_totssi = r_totssi/1000

*just keep real variable and year
keep year r_totssi

*Make sure this looks ok
tab year, su(r_totssi)

tempfile ssi
	save `ssi', replace

*********************Medicaid*********************
*First bring in per-child time series amounts
import excel using "./raw/Medicaid/Children's Medicaid expenditures.xlsx", firstrow ///
	clear sheet("forstata")
	keep if year > 2011 //just years we don't have from msis
	*merge to cpi data
	merge 1:1 year using "./dta/cpiu.dta", keep(master match) 
		assert _merge >= 3
		drop _merge
	summ cpiurs if year == 2015, d //get 2015 CPI 
	local cpi2015 = r(mean)
	g r_expend = mcaid*(`cpi2015'/cpiurs)
		keep year r_expend
	tempfile kidexpend
		save `kidexpend', replace

import excel "./raw/Medicaid/Mcaidtotals.xlsx", clear  ///
	firstrow cellrange(A2) sheet("Totkidexpend")
rename _all, lower
keep year i
	rename i r_totmcaid
*Append using expenditure estimates
*Finally merge to aggregate expenditure data to get $ amounts
append using `kidexpend'
	replace r_totmcaid = r_expend if r_expend != . & r_totmcaid == . //fill in years 2012-2015
	*Make sure this looks ok
	tab year, su(r_totmcaid)
	drop r_expend

tab year, mi

*tempfile
tempfile totmcaid
	save `totmcaid', replace
	
*********************SNAP, AFDC, CTC*********************
foreach prog in fs "afdc-tanf" CTC {
	import excel using "./raw/AFDC-EITC-TANF-etc-Annual-Expenditures-update-090317.xlsx", ///
	clear firstrow sheet("data-`prog'")
	di "This is `prog'"
	local p = substr("`prog'", 1, 2) //for saving tempfile
	*Drop non-years
	drop if CPIU == .
	cap rename Taxyear year
	drop if year == "" | regexm(year, "ctc") == 1 //take out some notes
	destring year, replace
		tab year, mi
	if "`prog'" == "fs" {
	rename Realtotalexpend r_totsnap
	}
	if "`prog'" == "afdc-tanf" {
	rename RealCashexpend r_tottanf
	}
	if "`prog'" == "CTC" {
	rename CTC r_totctc
	rename ACTC r_actc
	rename Nonref r_ctc
	
		foreach var of var r_* {
		replace `var' = `var'/1000
		} //was in millions, now in billions
}

keep year r_* //just what we need


tempfile tot`p'
	save `tot`p'', replace
	}
*EITC tab formatted differently-need to limit to % of benefits going to fams 
*w/kids
*Bring in collapsed data from NBER to create graph
*Import amount going to families w/ kids from SOI published tables 1996-2015
import excel using "./raw/eitc_IRS_SOI_returns.xls", clear firstrow ///
	sheet("Amtfamwkids")
rename _all, lower
keep year ramt*
	rename ramt* r_eitc
	keep if year >= 1996
	assert r_eitc > 0 & r_eitc != . //make sure no missings
tempfile eitc9615
	save `eitc9615', replace

*Bring in pct going to fams w/ kids for 1990-6
import excel using "./out/timeseries_nber.xlsx", firstrow clear ///
	sheet("pctkid")
tab year, su(pcteickid) //EITC created from SOI data
keep if year < 1996 //just keep years we don't have pub'd tables for
	tempfile pctkid
		save `pctkid', replace

import excel using "./raw/AFDC-EITC-TANF-etc-Annual-Expenditures-update-090317.xlsx", ///
	clear sheet("data-eitc") cellrange(A15)
	rename A year
		drop if year == "" | regexm(year, "Source") == 1 | regexm(year, "Log") == 1 ///
			| regexm(year, "data") == 1 //take out some notes
	destring year, replace
		tab year, mi
merge 1:1 year using `pctkid', keep(match) nogen
tab year, mi

*Change 1/15/2018
*Just keep 1990-1995, use SOI published tables breaking out by # kids for years
*1996 and later

rename L r_eitc
	replace r_eitc = (r_eitc*pcteickid) //just take % going to fams w/ kids
		lab var r_eitc "EITC 2014 bil $"
		keep year r_* //just what we need
*Append to more recent years
append using `eitc9615'

*merge back to other files
merge 1:1 year using `ssi', nogen
merge 1:1 year using `totfs', nogen
merge 1:1 year using `totaf', nogen
merge 1:1 year using `totCT', nogen
merge 1:1 year using `totmcaid', nogen
merge 1:1 year using `housing', nogen

*Update everything to 2015 CPI
*Source: https://data.bls.gov/pdq/SurveyOutputServlet
local cpi2014 = 236.736	
local cpi2015 = 237.017

foreach var of var r_* {
di "`var'"
	local lab: variable label `var'
		local lab = subinstr("`lab'", "2014", "2015", .) // change label to 2016 $
	replace `var' = `var' * (`cpi2015'/`cpi2014')
	label variable `var' "`lab'"
	desc `var' //make sure everything is ok
	}
	
*Save compiled file
save "./dta/totexp_allprog.dta", replace
****************************************************************************
****************************************************************************
**Program data**
****************************************************************************
****************************************************************************
*First bring in and clean up early years (1990-1995) QC data. Will merge the
*simplified data to the full hh data
*The do file ./raw/TANF/AFDCdata.do creates the dta file
use "./raw/AFDC9097.dta", clear

*Clean up variables
*Year
tab YY, mi
	rename YY year
	replace year = 1900 + year
tab year, mi

*state
rename STATE stfips
	tab stfips, mi
	tab stfips, nolab
	*Drop territories
	drop if stfips > 56
tab stfips, mi

*Family size and composition
*Assume everyone a case size = family unit
rename NUMPER famsize
g under652person = 0 //assume all non-elderly

*Just keep 1 observation per unit
sort UNIT_ID MM year stfips PER_ID
bys UNIT_ID MM year stfips: g unitcount = _n
	keep if unitcount == 1
	drop unitcount
	
*Just keep years we need
keep if year >= 1990 & year < 1995
	tab year, mi

*Estimate SNAP benefits (assuming 100% takeup)
*Estimate SNAP amount based on QC files, then project forward
g ak = (stfips == 2)
g hi = (stfips == 15)
merge m:1 year ak hi famsize using "./dta/snaprules.dta", keep(master match)
*Make sure everything matched
assert _merge >= 3 
	drop _merge
	
*Merge to HHS poverty guidelines
g calyear = year
merge m:1 calyear ak hi using "./raw/pov_guidelines_1977_2015.dta", keep(master match) 
*Make sure everything matched
assert _merge >= 3
	drop _merge
*Generate fam-specific guideline
g fplguideline = fpl1 + (famsize - 1)*(fpladdl)

/*SNAP formula is:
SNAP = 	0 if gross income > 130%FPL or 0 if net income > 100% FPL
	max(MaxBenefit-(NetIncome*30%),0) if gross income <= 130% FPL & 
		net income <= 100% FPL
	
Gross Income=	(earned income) + AFDC
Net Income=	(Gross income)-(standard deduction)-(20%*earned income)-
	(excess shelter deduction)-(med and childcare deduction)
*/
*AFDC QC files in monthly terms 
*Make sure all missings are 0
foreach earn in WAGE SELFEM OTHERN ADCAMT {
	replace `earn' = 0 if `earn' == .
		summ `earn', d
	}
g snapgross = (WAGE + SELFEM + OTHERN + ADCAMT)
*How to deal w/ childcare deduction? No limit after 2009
g snapnet = snapgross - disregard - (0.2*(WAGE + SELFEM + OTHERN)) - shelter
	replace snapnet = 0 if snapnet < 0
g snapben = max(snapmax - (snapnet*0.3),0)
	*Deal w/ income limits
	replace snapben = 0 if snapgross > 1.3*(fplguideline/12)
	replace snapben = 0 if snapnet > (fplguideline/12)
*Put SNAP amount in terms of max amount so easy to project forward
g snapben_pctmax = snapben/snapmax
	lab var snapben_pctmax "SNAP ben as pct of max ben"
	
	
*Get percent of caseload w/ any earnings in QC data for non/worker split
preserve
assert WAGE != . &  SELFEM != . //make sure no missings
	g worker = (WAGE > 0 | SELFEM > 0)
	*Make sure these look reasonable
	*(unweighted version)
	tab year worker, mi row
	*What fraction benefits going to workers?
	collapse (sum) ADCAMT [aw = WGT], by(year worker)
		egen totalADC = total(ADCAMT), by(year) //summing non/workers
		g pcttanfworker = ADCAMT/totalADC
	keep year worker pcttanfworker
	reshape wide pcttanfworker, i(year) j(worker)
/*Use the share of TANF recipients (1995 -> on) from HHS "Characteristics of TANF 
Recipients" tables. Save tempfile of the QC
to append to later years, then export the full timeseries to excel*/
tempfile afdcwork
	save `afdcwork', replace
restore

*Collapse so have family size for each state
keep year stfips famsize WGT snapben_pctmax ak hi

*Collapse so 1 obs per state-year
g one = 1
collapse (sum) one (mean) snapben_pctmax  [iw = WGT], by(stfips famsize year ak hi)
	rename one total
*Get percent
egen totaltotal = total(total), by(stfips year)
	g pct = (total/totaltotal)*100
	*Check that pct is between 0 & 100
		assert pct >= 0 & (pct <= 100 | pct == .)
	drop totaltotal
		
merge m:1 stfips using "./dta/xwalk_fipst.dta", keep(master match)
*Make sure everything matched
assert _merge >= 3
drop _merge

*Append to tanf hh data
append using "./raw/tanfhh.dta"
*Fill in remaining stfips
merge m:1 state using "./dta/xwalk_fipst.dta", update 
	tab year _merge, mi
	tab stfips _merge, mi
	keep if _merge >= 3 
	drop _merge	
	
*Forecast SNAP benefits for subsequent years using 1994 value
egen snappct1994 = max((year == 1994)*snapben_pctmax), by(stfips famsize)
	replace snapben_pctmax = snappct1994 if year > 1994
	drop snappct1994
	
*Bring in max grant data
merge 1:1 stfips year famsize using "./raw/afdc_max_benefits_1980-2015.dta", ///
	keep(master match) gen(merge2) //need to fill in max benefits for years 2014 and 2015
	drop *merge*

*From readme: afdcG is the combined variable
tab year, su(afdcG)
cap drop _merge
	
*Merge back to max amounts (so have all years of maximum benefits
replace ak = (stfips == 2 | stabb == "AK")
replace hi = (stfips == 15 | stabb == "HI")
merge m:1 year ak hi famsize using "./dta/snaprules.dta", keep(master match)
	assert famsize > 20 if _merge == 1
	drop if famsize >  20
	drop _merge
	g snapben = (snapben_pctmax*snapmax)
	tab year, su(snapben)

*Merge to statefips/abbrev xwalk
merge m:1 stfips using  "./dta/xwalk_fipst.dta", keep(master match)
*Make sure everything lined up
assert _merge >= 3
	drop _merge

*Merge to poverty thresholds
g under652person = 0 if famsize < 3 
	replace under652person = -1 if famsize >= 3 //assume no one > 62.5
*Assume all TANF hh are single-parent
g kids = famsize -1 

**SPM measure
g calyear = year
merge m:1 calyear using "./raw/spmthresh14.dta", ///
	keep(master match) keepusing(spmrent) //assume everyone renter
	*Check everything matched 
	assert _merge >= 3
	drop _merge
*Now create fam-specific thresholds
*from Fox (2017):
/*One and two adults: scale =
(adults)0.5
Single parents: scale = (adults + 0.8
* first child + 0.5 * other children)0.7
All other families: scale = (adults +
0.5 * children).*/
*SPM version in the raw data file is for 2 adult, 2 kid family -- adjust so back at per-person level
replace spmrent = spmrent/(2 + (2*0.5))
*TANF: assuming all recipients are single parents
g spmthreshold = spmrent + (0.8*spmrent)*(max(famsize-1,0)) + max((0.5*(famsize - 2)),0)*spmrent
*Assume no one's working, no one gets EITC. Then SPM
g tanf_atti_spm = ((afdcG+snapben)*12)/spmthreshold
	lab var tanf_atti_spm "ATT Income-poverty TANF recipients using SPM threshold"
	
**Official measure
*m:1 -- multiple states each year
merge m:1 famsize calyear kids under652person ///
	using "./raw/pov_thresholds_1980_2015.dta", ///
	keep(master match)
*Make sure this worked
tab year _merge
assert _merge >= 3 if famsize < 10 //make sure matched for small families
tab famsize _merge if afdcG == .
drop calyear
drop if _merge == 1 | afdcG == . //eventually might take back to 1960s, 
//now keep to 1977. Omit observations we don't have the AFDC max for
drop _merge

*generate fpl
rename threshold fpl

*Calculate max AFDC/TANF rel to fed poverty
g tanf_opm = (12*(afdcG))/fpl
	lab var tanf_opm "AFDC/TANF to OPM ratio"
bys year: sum tanf_opm, d
g tanf_atti_opm = ((afdcG+snapben)*12)/fpl //add in SNAP
	lab var tanf_atti_opm "ATT Income-pov TANF recipients OPM thresh"
bys year: sum tanf_atti_opm, d

*Check if any are above 50% for each base
foreach fpl in opm atti_spm atti_opm {
g tanf_`fpl'_ge50 = (tanf_`fpl' > 0.5 & tanf_`fpl' != .)
	replace tanf_`fpl'_ge50 = . if tanf_`fpl' == .
	lab var tanf_`fpl'_ge50 "TANF `fpl' ratio ge 50pct"
tab stfips year if tanf_`fpl'_ge50 == 1
*How much higher are these?
sum tanf_`fpl' if tanf_`fpl'_ge50 == 1
*Where are they?
tab stfips if tanf_`fpl'_ge50 == 1 

}

*About 20% of these are for 1 person families. 
*how many fam types
bys stfips year: g n = _n //counter so just count each state once
egen numfam_ge50 = total(tanf_opm_ge50), by(stfips year)
	tab stfips numfam_ge50 if n == 1, row

*Reshape wide so one obs per state-year
drop n size under652person kids spmthreshold 
cap drop _merge
cap drop spmrent numfam_ge50
cap drop snapben_pctmax disregard snapmax

*Only have families up to 6 ppl for the full set of years (have more
*complete family info for earlier years)
keep if famsize <= 4
*Can drop kids bc assuming all single parent
reshape wide afdc* tanf* pct fpl* total snapben, i(state stfips stabb year) j(famsize)

g pctu4 = 0
*Don't include child-only cases
	forvalues n = 2/4 {
		replace pctu4 = pctu4 + pct`n' if pct`n' != . & tanf_opm`n' != .
		}
		tab year, su(pctu4)

*How many fams are we missing just looking at those with 6 or fewer members?
sum pctu4 if year >= 1990, d
	
*Calculate lower bound of TANF dollars going to 0-50% FPL
g tanfcatatti_spm1 = 0

 //total going to families with <= 6 members
forvalues n = 2/4 { //family size, don't include child-only cases
*Loop over each poverty measure

replace tanfcatatti_spm1 = tanfcatatti_spm1 + ///
	((tanf_atti_spm`n' < 0.5 & tanf_atti_spm`n' > 0)*(pct`n'/pctu4)) if pct`n' != .

	}

*Fill in info for states without pcts
replace tanfcatatti_spm1 = 1 if (tanf_atti_spm2 < 0.5 & tanf_atti_spm2 > 0) & ///
	(tanf_atti_spm3 < 0.5 & tanf_atti_spm3 > 0) ///
	 & (tanf_atti_spm4 < 0.5 & tanf_atti_spm4 > 0)

sum tanfcatatti_spm1, d
bys year: sum tanfcatatti_spm1

*Merge to totals	
*Get total caseload for previous years
rename stfips statefip
cap drop _merge
merge m:1 statefip year using "./raw/annual_afdc_tanf_case.dta", ///
	keep(master match) nogen
*Keep caseloads and recipients same in 2014 and 2015
foreach var in caseload recipients {
	egen `var'2014 = max(`var'*(year == 2014)), by(statefip)
		replace `var'2014 = . if `var' == 0 //make sure every state serves someone
		replace `var' = `var'2014 if year == 2015 & `var' == .
		drop `var'2014
		}
	

*Get total expenditures for previous years
merge m:1 statefip year using "./raw/annual_afdc_tanf_exp.dta", ///
	keep(master match) nogen
	foreach var in expenditures cash {
	egen `var'2014 = max(`var'*(year == 2014)), by(statefip)
		replace `var' = `var'2014 if year == 2015 & `var' == .
		drop `var'2014
		}
	*Handful of states have negative expenditures, set to 0
	tab year if cash < 0
	tab state if cash < 0
	replace cash = 0 if cash < 0
	
*Missing caseload infor in HI, NM, ND, OR, PA, RI and MN in 1997 (MN all in the <= 50%
*bucket except 1-person families (child-only cases?)
*Look at how much the share of 3-person families changes in RI
foreach state in HAWAII "NEW MEXICO" "NORTH DAKOTA" OREGON MINNESOTA PENNSYLVANIA "RHODE ISLAND" {
tab year if state == "`state'", su(pct3)
*Linearly interpolate for missing states
foreach var of var pct2 pct3 pct4 pctu4 {
ipolate `var' year if year >= 1996 & year <= 1998 & state == "`state'", ///
	g(`var'_ip)
	replace `var' = `var'_ip if year == 1997 & state == "`state'" & ///
		`var'_ip != .
	}
	*Check that this worked
	tab pct3_ip year if year >= 1996 & year <= 1998 & state == "`state'"
	tab pct3 year if year >= 1996 & year <= 1998 & state == "`state'"
	drop *ip
	*Fill in pct u6 for RI
	replace pctu4 = 0 if state == "`state'" & year == 1997
	forvalues n = 2/4 {
		replace pctu4 = pctu4 + pct`n' if state == "`state'" & ///
			year == 1997 & pct`n' != . & tanf_opm_ge50`n' != .
			}
	summ pctu4 if state == "`state'" & year == 1997
	}
	
foreach state in OREGON  {
tab year if state == "`state'", su(pct3)
*Linearly interpolate or RI
foreach var of var pct2 pct3 pct4 pctu4 {
ipolate `var' year if year >= 1999 & year <= 2001 & state == "`state'", ///
	g(`var'_ip)
	replace `var' = `var'_ip if year == 2000 & state == "`state'" & ///
		`var'_ip != .
	}
	*Check that this worked
	tab pct3_ip year if year >= 1999 & year <= 2001 & state == "`state'"
	tab pct3 year if year >= 1999 & year <= 2001 & state == "`state'"
	drop *ip
	*Fill in pct u6 for RI
	replace pctu4 = 0 if state == "`state'" & year == 2000
	forvalues n = 2/4 {
		replace pctu4 = pctu4 + pct`n' if state == "`state'" & ///
			year == 2000 & pct`n' != . & tanf_opm_ge50`n' != .
			}
	summ pctu4 if state == "`state'" & year == 2000
	}
		
	
*Loop over each poverty measure
drop if year < 1990

*Calculate total shares going to each family type each year
g pctcaseloadatti_spm_ge50 = 0
*Loop through family sizes, ignoring family sizes that are missing in
*a given year, and excluding child-only cases
forvalues f = 2/4 { 
	replace pctcaseloadatti_spm_ge50 = pctcaseloadatti_spm_ge50 + ///
		(tanf_atti_spm_ge50`f' == 1)*(pct`f'/pctu4) if ///
		pct`f' != . & tanf_atti_spm_ge50`f' != .
		}
*Fill in blanks
replace pctcaseloadatti_spm_ge50 = 1 if (tanf_atti_spm_ge502 == 1) & (tanf_atti_spm_ge503 == 1) &  ///
	(tanf_atti_spm_ge504 == 1) 

*Do this the reverse way to make sure calculations are right
g pctcaseloadatti_spm_lt50 = 0
forvalues f = 2/4 {
	replace pctcaseloadatti_spm_lt50 = pctcaseloadatti_spm_lt50 + (pct`f'/pctu4) ///
		if tanf_atti_spm`f' < .5 & pct`f' != .
		}
	replace pctcaseloadatti_spm_lt50 = 1 if tanf_atti_spm2 < .5 & tanf_atti_spm3 < .5 & tanf_atti_spm4 < .5 
*Don't have caseload information for MN and RI in 1997
cap drop one
g one = pctcaseloadatti_spm_lt50 + pctcaseloadatti_spm_ge50
tab state year if pctu4 == . & one == . &  year > 1995
	cap table state if pctu4 == . & one == . &  year > 1995, ///
		c(mean tanf_atti_spm_ge502 mean tanf_atti_spm_ge503 ///
		mean tanf_atti_spm_ge504)
		drop one
*For MN, assume all 1-person families are child only cases
replace pctcaseloadatti_spm_ge50 = 0 if tanf_atti_spm_ge502 == 0 & tanf_atti_spm_ge503 == 0 & ///
		tanf_atti_spm_ge504 == 0 & year == 1997 & state == "MINNESOTA"
replace pctcaseloadatti_spm_lt50 = 1 if tanf_atti_spm_ge502 == 0 & tanf_atti_spm_ge503 == 0 & ///
		tanf_atti_spm_ge504 == 0 & year == 1997 & state == "MINNESOTA"
		
	

*Make sure this is right to a rounding error
cap drop one
g one = pctcaseloadatti_spm_lt50 + pctcaseloadatti_spm_ge50
assert one >= .99 & one <= 1.01 if year >= 1990
	
*Now collapse to national totals and export to excel
g cashatti_spm_lt50 = cash*pctcaseloadatti_spm_lt50
g cashatti_spm_ge50 = cash*(1-pctcaseloadatti_spm_lt50)

*Generate a couple variables so all graphs look the same
g cashatti_spm_100_150 = 0
g cashatti_spm_150_200 = 0
g cashatti_spm_ge200 = 0

*Just keep necessary variables
collapse (sum) cash*, by(year)

	g pcttanfatti_spm1 = cashatti_spm_lt50/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200 + cashatti_spm_ge200)
	g pcttanfatti_spm2 = cashatti_spm_ge50/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200 + cashatti_spm_ge200)
	g pcttanfatti_spm3 = cashatti_spm_100_150/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200 + cashatti_spm_ge200)
	g pcttanfatti_spm4 = cashatti_spm_150_200/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200 + cashatti_spm_ge200)
	g pcttanfatti_spm5 = cashatti_spm_ge200/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200 + cashatti_spm_ge200)
	*Make sure everything looks ok
	forvalues n = 1/5 {
		tab year, su(pcttanfatti_spm`n')
		}
	
	*Totals conditional on < 200%
	g u200_pcttanfatti_spm1 = cashatti_spm_lt50/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200)
	g u200_pcttanfatti_spm2 = cashatti_spm_ge50/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200)
	g u200_pcttanfatti_spm3 = cashatti_spm_100_150/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200)
	g u200_pcttanfatti_spm4 = cashatti_spm_150_200/(cashatti_spm_lt50 + cashatti_spm_ge50 + cashatti_spm_100_150 + cashatti_spm_150_200)
	forvalues n = 1/4 {
		tab year, su(pcttanfatti_spm`n')
		}

*Merge to national agg totals
merge 1:1 year using "./dta/totexp_allprog.dta", keep(match) nogen ///
	keepusing(r_tottanf)
foreach bin of var pcttanfatti_spm* { //national total in each bin
	g tot`bin' = `bin'*r_tottanf
	}

*Get annual totals
keep if year >= 1990
keep year pct* tot*
sort year
	export excel using "./out/timeseries.xlsx", sheet(TANF) sheetreplace ///
	firstrow(variable)

import excel using "./raw/TANFwork.xlsx", sheet("US TOTALS") firstrow clear
	tab year, su(pctworking)
	*Assume that workers and non-workers get the same TANF grant
	rename pctworking pcttanfworker1
	drop avgearnings //not using this
	/*Don't have 1997-1999; only have # of adults working. Table links broken
	for 1998, so have to interpolate this year anyway
	Assume: Child-only cases: no one works. All other hh have 1 adult member.
	*/
	ipolate pcttanfworker1 year if year >= 1997 & year <= 1999, ///
	g(worker_ip)
		replace pcttanfworker1 = worker_ip if year > 1997 & year <1999 & ///
			pcttanfworker1 == .	
			drop worker_ip

		*Make sure between 0-1
		replace pcttanfworker1 = pcttanfworker1/100
			assert pcttanfworker1 >= 0 & pcttanfworker1 <= 1
	*Create variable for share non-workers
	g pcttanfworker0 = 1-pcttanfworker1
		assert pcttanfworker0 >= 0 & pcttanfworker0 <= 1
append using `afdcwork'

*Merge to annual cash
merge 1:1 year using "./dta/totexp_allprog.dta", keep(match) keepusing(r_tottanf) ///
	nogen
forvalues w = 0/1 {
	g r_tottanf`w' = r_tottanf*pcttanfworker`w' //amount for workers
	}
drop r_tottanf
	
export excel using "./out/timeseries_workers.xlsx", sheet("TANF") ///
	firstrow(variable) sheetreplace

****************************************************************************
*Refundables: EITC/CTC
****************************************************************************
*First create timeseries_nber.xlsx using then NBER public use files
import excel using "./out/timeseries_nber.xlsx", firstrow clear ///
	sheet("EITCatti_spm")
*Loop over all the credits
rename *eic_d* *eic*
rename pctctc* pctactcctc* //both refundable and non-refundable
rename *chtcr* *ctc*
rename *addcrd* *actc*

*Loop over all credits: EITC and combined A/CTC
foreach cred in eic actcctc  {
	if "`cred'" == "eic" local lab = "EITC"
	if "`cred'" == "ctc" local lab = "CTC"
	if "`cred'" == "actc" local lab = "ACTC"
	if "`cred'" == "actcctc" local lab = "ACTC and CTC"
	
preserve
	
*Dollar amounts
*Merge to national agg totals
	local prog = lower("`lab'")
		local prog = subinstr("`prog'", "actc and ", "tot", .) //fix CTC labeling
		di "`prog' dollar amounts" //make sure this worked
		
	merge 1:1 year using "./dta/totexp_allprog.dta", keepusing(r_`prog')
	*Make sure have data for all years
	assert _merge >= 2 
	keep if year >= 1990
	drop _merge


*Now create forecasted amount based on pct shares in 2012
foreach bin of var pct`cred'atti_spm* {
	cap drop `bin'2011
	cap drop hat`bin'
	egen `bin'2011 = max(`bin'*(year == 2011))
	g hat`bin' = `bin'
		replace hat`bin' = `bin'2011 if year >= 2012 & year != .
}
 cap drop *2011


	
*Graph
foreach bin of var pct`cred'atti_spm* { 
	//national total in each bin (adjusted for % going to families w/ kids
	g tot`bin' = `bin'*r_`prog'
	g hattot`bin' = hat`bin'*r_`prog'
	}
*Get annual totals
forvalues p = 1/5 {
	tab year, su(totpct`cred'atti_spm`p')
		replace totpct`cred'atti_spm`p' = 0 if totpct`cred'atti_spm`p' == . & year < 1998 
		//fill in missings
	}
	
*Amount conditional on income < 200% poverty
foreach hat in hat "" {
g u200_`hat'pct`cred'atti_spm1 =  `hat'pct`cred'atti_spm1/ ///
	(`hat'pct`cred'atti_spm1 + `hat'pct`cred'atti_spm2 + `hat'pct`cred'atti_spm3 + `hat'pct`cred'atti_spm4)
g u200_`hat'pct`cred'atti_spm2 =  `hat'pct`cred'atti_spm2/ ///
	(`hat'pct`cred'atti_spm1 + `hat'pct`cred'atti_spm2 + `hat'pct`cred'atti_spm3 + `hat'pct`cred'atti_spm4)
g u200_`hat'pct`cred'atti_spm3 =  `hat'pct`cred'atti_spm3/ ///
	(`hat'pct`cred'atti_spm1 + `hat'pct`cred'atti_spm2 + `hat'pct`cred'atti_spm3 + `hat'pct`cred'atti_spm4)
g u200_`hat'pct`cred'atti_spm4 =  `hat'pct`cred'atti_spm4/ ///
	(`hat'pct`cred'atti_spm1 + `hat'pct`cred'atti_spm2 + `hat'pct`cred'atti_spm3 + `hat'pct`cred'atti_spm4)
}

*Export to excel
	keep hattotpct`cred'* year
		rename hattotpct`cred'* `cred'*
		keep if year <= 2015 
	sort year
	export excel using "./out/timeseries.xlsx", sheet(`cred'atti_spmamt) sheetreplace ///
	firstrow(variable)

	restore
	} //end credit loop
	

****************************************************************************
*SNAP
****************************************************************************
/*Quality Control files available through Mathematica:
https://host76.mathematica-mpr.com/fns/
*Downloads and save in the below directory
*/
clear
*Before 1996-- already in dta form
forvalues y = 90/94 {
	use "./raw/QC/old_qc_data/qcfy`y'.dta", clear
	di "This is SNAP in 19`y'"

*Before dropping hh without kids, calculate % of benefits going to hh w/ kids
if `y' < 94 { //94 has same coding as earlier years
rename gross fsgrinc
rename bonus fsben 
rename kidcount fsnkid //make sure consistent w/ later years
rename area statefip
rename hhsize fsusize
rename hherngs fsearn
rename hhunemp fsunemp
rename hhwcomp fswcomp
rename hhafdc fsafdc
rename hhssi fsssi
rename hhga fsga
rename dcare fsdepded
rename hheitc fseitc
rename weight hwgt
}
cap rename state statefip
destring fsben fsnkid fsusize, replace


egen totben = total(fsben)
egen kidben = total(fsben*(fsnkid > 0 & fsnkid != .))
g pct2hhwkids = kidben/totben
	lab var pct2hhwkids "Pct SNAP going to hh w kids"
*Just keep families w/ kids
tab fsnkid, mi
keep if fsnkid > 0
*Rename SNAP unit size for merging
g famsize = fsusize
*Create year variable
g year = 1900 + `y'
*Destring a couple variables we need
cap rename AGE*, lower
destring age*, replace
*Save tempfile
tempfile snap`y'
	save `snap`y'', replace
	
	}

forvalues y = 1996/2015 {
	unzipfile "./raw/QC/qcfy`y'_st.zip", replace
	di "This is SNAP in `y'"
use qc_pub_fy`y'.dta, clear
*Before dropping hh without kids, calculate % of benefits going to hh w/ kids
egen totben = total(fsben)
egen kidben = total(fsben*(fsnkid > 0 & fsnkid != .))
g pct2hhwkids = kidben/totben
	lab var pct2hhwkids "Pct SNAP going to hh w kids"
*Just keep families w/ kids
tab fsnkid, mi
keep if fsnkid > 0
*Rename SNAP unit size for merging
g famsize = fsusize
*Create year variable
g year = `y'
*Destring a couple variables we need
cap rename AGE*, lower
destring age*, replace
*Save tempfile
tempfile snap`y'
	save `snap`y'', replace
	
	}

	
*Merge everything together
use `snap1996', clear
forvalues y = 1997/2015 {
	append using `snap`y'', force 
	}
	
*Append to old QC files (already in stata form)
forvalues y = 90/94 {
	append using `snap`y'', force 
		}

	
*Make sure no missing years
assert year != .
tab year, mi //make sure all years are here
	
*Merge to poverty data
rename fsnkid kids
*All hh non-elderly (ow wouldn't have kids)
egen agemax = rowmax(age1* age2 age3 age4 age5 age6 age7 age8 age9)
	sum agemax, d
	tab year, su(agemax)
	g under652person = -1 if famsize >= 3 
	replace under652person = 0 if famsize < 3 
	tab under652person, mi
*A couple of additional age cutoffs for tax credit calculation
foreach age in 13 17 18 { //loop through all age cutoffs
g numage`age' = 0
	forvalues n = 1/16 { //loop through all ppl in the hh
		replace numage`age' = numage`age' + 1 if age`n' <= `age'
		}
	tab numage`age', mi
	replace numage`age' = 0 if agemax == . //hh where all ages are missing
	}
*Assume all kids are eligible for the CTC and EITC
replace numage17 = kids if numage17 == .
replace numage18 = kids if numage18 == .

*Make sure everything matched for families with 6 and fewer ppl (cut off here for simplicity)
g calyear = year
*Merge to SPM
merge m:1 calyear using "./raw/spmthresh14.dta", keep(master match) keepusing(spmrent) 
//just keep what we need
*Make sure have info for all years
assert _merge >= 3
	drop _merge
*Calculate family-specific thresholds
/*From Fox, 2017:
https://www.census.gov/content/dam/Census/library/publications/2017/demo/p60-261.pdf
One and two adults: scale = (adults)0.5
Single parents: scale = (adults + 0.8* first child + 0.5 * other children
All other families: scale = (adults + 0.5 * children).
*/
*SPM version in the raw data file is for 2 adult, 2 kid family -- adjust so back at per-person level
replace spmrent = spmrent/(2 + (2*0.5))
g spmthreshold = (1 + 0.8)*spmrent if famsize == 2 & kids == 1 //single parent 1 kid
	replace spmthreshold = (1+0.8+ 0.5*(famsize - 2))*spmrent if (famsize - kids) == 1 //single parent, multiple kids
	replace spmthreshold = ((famsize - kids) + 0.5*kids)*spmrent if (famsize - kids ) > 1 //multiple adult hh

merge m:1 calyear famsize kids under652person ///
	using "./raw/pov_thresholds_1980_2015.dta", keep(master match)
*Make sure everything matched
*assert _merge >= 3
keep if _merge >= 3 //not all large families merged
drop _merge calyear //drop intermediate vars
tab year, mi //make sure all years are here

*Create poverty ratio
rename threshold fpl

*Calculate poverty as percent of gross income
*All income amounts are monthly $
tab year, su(fsgrinc)
replace fseitc = 0 if fseitc == . //EITC not included in QC files 1998-2006
g fsuinc_opm = ((fsgrinc-fseitc)*12)/fpl
sum fsuinc_opm
	lab var fsuinc_opm "FSU inc OPM ratio"
	
*Other income sources to create SPM poverty line
*SPMu_totval = fsgrinc (includes cash assistance and other ben)
*SPMu_SNAPSub = fsben
*SPMu_CapHouseSub **NOT IN QC
*SPMu_SchLunch **NOT IN QC
*SPMu_EngVal = fsenergy
*SPMu_WICval **NOT IN QC 
*SPMu_ChildSupPd = fscsuprt  
*SPMu_CapWknCh - ?? 
*SPMu_MedOOPnMCareB = fsmedexp 
*Use TAXSIM to get the following:
*SPMu_FedTax + 
*ferp_val - 
*SPMu_FICA - 
*SPMu_stTax -

*Make sure have weights for all years
g test = hwgt == .
tab year test
assert hwgt != .
drop test
	
*Just keep var we need
keep year fsgrinc fpl fsuinc_opm famsize kids fsben fsenergy fseitc spmthreshold ///
	fscsuprt fsmedexp under652person state fssocsec fsafdc fstanf fsga fsssi ///
	fsunemp fswcomp rent numage* fsearn fsslfemp realprop fsdepded pct2* hwgt
	
*Indicator for whether have earnings
*Make sure have all logical values and no-one is missing
assert fsearn != .
*Look at distribution over years to make sure this looks reasonable
tab year [aw = hwgt], su(fsearn) nofreq
g worker = (fsearn > 0)
tab year worker [aw = hwgt], mi row

*Monthly benefit amounts going to each worker group
tab year worker [aw = hwgt], su(fsben)
	
*Run through TAXSIM
*1.taxsimid Case ID (arbitrary, but must be a non-negative numeric)
g taxsim_id = _n
*2. year: Tax year ending Dec 31
tab year, mi
*3. state (SOI codes)
*Bring in state SOI codes:
rename state statefip
merge m:1 statefip using "./raw/stateabb.dta", keepus(state_soi)
	tab statefip _merge if _merge != 3 //make sure this looks ok
	rename state_soi state
/*4. mstat Marital Status
	1. single or head of household (unmarried)
	2. joint (married)
	6. separate (married). 
	8. Dependent taxpayer. (Typically child with income).*/
g mstat = 1 
	replace mstat = 2 if (famsize - kids) > 2 //assume all hh with 2 + adults are MFJ
*5. page Age of primary taxpayer (or zero). 
	g page = 0 //assume this is 0 for all
*6. sage Age of spouse (or zero).
	g sage = 0
		replace sage = 1 if under652person == 1
*7. depx Number of dependents (Personal Exemptions).
	tab kids, mi
	rename kids depx
*8. dep13 Number of children under 13 (Dependent Care Credit).
	tab numage13, mi
	g dep13 = min(depx, numage13)
*9. dep17 Number of children under 17 (Child Credit).
	tab numage17, mi
	g dep17 = min(depx, numage17)
*10. dep18 Number of children under 18 (EITC).
	tab numage18, mi
	g dep18 = min(depx, numage18)
*11. pwages Wage and salary income of Primary Taxpayer (include self-employment).
	*(Doesn't really matter, lump everything at hh level)
	sum fsearn, d
	*Check that this includes self-employment
	assert fsearn >= fsslfemp if fsslfemp != .
	g pwages = (fsearn *12) //create annual amount
*12. swages Wage and salary income of Spouse (include self-employment).
	g swages = 0
*13. dividends Dividend income (qualified dividends only for 2003 on).
	*Not in QC
	g dividends = 0
*14. intrec Interest Received (+/-)
	*Not in QC
	g interest = 0
*15. stcg Short Term Capital Gains or losses. (+/-)
*16. ltcg Long Term Capital Gains or losses. (+/-)
foreach term in st lt {
	g `term'cg = 0
	}
*17. otherprop Other property income
	sum realprop, d
*18. nonprop Other non-property income (this is mostly alimony and for itemizers)
	g otherprop = 0
*19. pensions Taxable Pensions and IRA distributions
	*Not in QC
	g pensions = 0
*20. gssi Gross Social Security Benefits
	sum fssocsec, d
	g gssi =  fssocsec*12 //annual amount
*21. ui Unemployment compensation received.
	egen ui = rowtotal(fsunemp fswcomp)
		replace ui = ui*12 //annual amount
*22. transfers Other non-taxable transfer income
	egen transfers = rowtotal(fsben fsenergy fsafdc fstanf fsga fsssi)
		replace transfers = transfers*12 //annual amount
*23. rentpaid Rent Paid (used only for calculating state property tax rebates)
	sum rent, d
		replace rent = 0 if rent == .
	rename rent rentpaid
*24. proptax Real Estate taxes paid. 
	g proptax = 0
*25 other item (mostly for itemizers)
	g other = 0
*26. childcare Child care expenses.
	sum fsdepded, d
	rename fsdepded childcare
*27. mortgage Deductions not included in item 16 and not a preference for the AMT
	*Lumped in with rent in the QC: Unsure how to disentangle. Set as 0 for now
	g mortgage = 0

*Create global for taxsim variables so easy to filter through
global taxsim taxsim_id year state mstat page sage depx dep13 dep17 ///
	dep18 pwages swages dividends interest stcg ltcg realprop otherprop ///
	pensions gssi ui transfers rent proptax other childcare mortgage
	
*Make sure things look ok
summ $taxsim
	
	
compress
save "./temp/snapqc.dta", replace

*Comment this out if don't need to run taxsim

preserve
	keep $taxsim
	order $taxsim
	*Set all missings to 0
	foreach a in q w e r t y u i o p a s d f  h j k l z x c v b n m {
	foreach var of var _all {
	replace `var' = 0 if `var' == . | `var' == .`a'
	}
	}
	compress
	save "./temp/taxsimdata.dta", replace //for debugging
	taxsim27, full replace
restore


*Compute SPM resources
*Merge main file with SOI
use "./temp/snapqc.dta", clear
cap drop _merge
	merge 1:1 taxsim_id using "taxsim_out.dta", assert(match) //make sure everyone matched
*Just keep what we need for SPM resources (ACTC and refund EITC included in fiitax --
*negative means received net refund)
drop v*

*Generate SPM resources
*For consistency with other assistance programs, define resources as gross income minus taxes
*+ SNAP benefits
g spmresources = (fsgrinc-fseitc + fsben)*12 - fiitax - siitax - fica/2

tab year [aw = hwgt], su(spmresources )

foreach thresh in fpl spmthresh {
	//names of SPM thresholds
	if "`thresh'" == "spmthresh" local t = "atti_spm"
g fsuinc_`t' = spmresources/`thresh'
sum fsuinc_`t'
	lab var fsuinc_`t' "ATT FSU inc `thresh' ratio"

*Create income bins
/*<50% FPL, 50-100% FPL, 100-150% FPL, >150% FPL. */
g `t'bin = (fsuinc_`t' < .5)
	replace `t'bin = 2 if (fsuinc_`t' >= .5 & fsuinc_`t' < 1)
	replace `t'bin = 3 if (fsuinc_`t' >= 1 & fsuinc_`t' < 1.5)
	replace `t'bin = 4 if (fsuinc_`t' >= 1.5 & fsuinc_`t' < 2)
	replace `t'bin = 5 if (fsuinc_`t' >= 2 & fsuinc_`t' != .)
tab year `t'bin [iw = hwgt], mi //make sure everything looks ok
*Label values
lab def `t'bin 1 "< 50% SPM" 2 "50-100% SPM" 3 "100-150% SPM" 4 "150-200% SPM" ///
		5 "200%+ SPM"
	lab values `t'bin `t'bin
	}

compress
save "./dta/snapqc.dta", replace
	*Delete temporary file
	cap erase "./temp/snapqc.dta"
	
*Check the recent increase going to families < 50% FPL
*Average benefit level
	tab year atti_spmbin [aw = hwgt], su(fsben)
*Number of hh in each bin
	tab year atti_spmbin [iw = hwgt], row

*Export the fraction going to each income bucket into excel doc
preserve
*Total in each poverty bin
collapse  (sum) fsben (mean) pct2hhwkids [aw = hwgt], by(year atti_spmbin)

*Generate total SNAP going to hh with kids
egen totalfsben = total(fsben), by(year)
g pctfs = fsben/totalfsben
	sum pctfs, d
	*Make sure all values are logical
	assert pctfs >0 & pctfs < 1

reshape wide pctfs fsben , i(year) j(atti_spmbin)
		
*Limiting to those < 200%
forvalues n= 1/4 {
	g u200_pctfs`n' = pctfs`n' /(pctfs1 + pctfs2 + pctfs3 + pctfs4)
	}				
		
*Finally merge to aggregate expenditure data to get $ amounts
merge 1:1 year using "./dta/totexp_allprog.dta", keep(match) nogen ///
	keepusing(r_totsnap) //just keep info we have QC files for
	
*egen pct2hhwkids96 = max(pct2hhwkids*(year == 1996))
*Get percent spent on each bin
foreach bin in fs1 fs2 fs3 fs4 fs5 {
	g tot`bin' = (pct`bin'*r_totsnap*pct2hhwkids)
	}
			
*Excel output
keep year pctfs* totfs*
sort year
export excel using "./out/timeseries.xlsx", sheet("SNAPatti_spm") sheetreplace ///
	firstrow(variable)
		restore
		
*SNAP by work status
*Modify poverty loop for work split
preserve
*Total in each worker category
collapse  (sum) fsben (mean) pct2hhwkids [aw = hwgt], by(year worker)

*Generate total SNAP going to hh with kids
egen totalfsben = total(fsben), by(year)
g pctfs = fsben/totalfsben
	sum pctfs, d
	*Make sure all values are logical
	assert pctfs >0 & pctfs < 1
	
*Drop food stamp amount from QC files to reduce confusion
drop *fsben*
reshape wide pctfs, i(year) j(worker)
	lab var pctfs0 "Pct SNAP to non-working hh"
	lab var pctfs1 "Pct SNAP to working hh"
	lab var pct2 "Pct all SNAP to hh w kids"
sort year

		
*Finally merge to aggregate expenditure data to get $ amounts
merge 1:1 year using "./dta/totexp_allprog.dta", keep(match) nogen ///
	keepusing(r_totsnap) //just keep info we have QC files for
		tab year, mi
	
*egen pct2hhwkids96 = max(pct2hhwkids*(year == 1996))
*Get percent spent on each bin
foreach bin in fs0 fs1 {
if "`bin'" == "fs0" local w = "non"
if "`bin'" == "fs1" local w = ""
	g totworker`bin' = (pct`bin'*r_totsnap*pct2hhwkids)
		lab var totworker`bin' "Total SNAP to `w' workers"
	}	
		
*Excel output
keep year pct* tot*
	drop pct2hhwkids
sort year
export excel using "./out/timeseries_workers.xlsx", sheet("SNAP") sheetreplace ///
	firstrow(variable)
		restore
	
*Delete inflated files
forvalues y = 1996/2015 {
cap erase "qc_pub_fy`y'.csv"
cap erase "qc_pub_fy`y'.dta"
}


****************************************************************************
*Medicaid, SSI, and public housing
****************************************************************************	
*Bring in CPS data and create shares going to each income: poverty bin
use "./dta/cps_BPEA.dta", clear
cap drop year
	g year = calyear //for consistency with other data
	drop if year < 1989

rename h_inc_alt1a_spm inc_atti_spm
	
*Create income bins
/*<50% FPL, 50-100% FPL, 100-150% FPL, >150% FPL. */
g atti_spmbin = (inc_atti_spm < .5)
	replace atti_spmbin = 2 if (inc_atti_spm >= .5 & inc_atti_spm < 1)
	replace atti_spmbin = 3 if (inc_atti_spm >= 1 & inc_atti_spm < 1.5)
	replace atti_spmbin = 4 if (inc_atti_spm >= 1.5 & inc_atti_spm <2)
	replace atti_spmbin = 5 if (inc_atti_spm >= 2 & inc_atti_spm != .)
	replace atti_spmbin = . if atti_spmbin == 0 //missing values
tab year atti_spmbin, mi //make sure everything looks ok
*Label values
lab def atti_spmbin 1 "< 50% `thresh'" 2 "50-100% `thresh'" 3 "100-150% `thresh'" ///
		4 "150-200% `thresh'" 5 "200%+"
	lab values atti_spmbin atti_spmbin
	
*Loop over programs 
foreach prog in numkidcaid ssival public {
	if "`prog'" == "numkidcaid" local p = "mcaid"
	if "`prog'" == "ssival" local p = "ssi"
	if "`prog'" == "public" local p = "public"
	
	if "`prog'" == "numkidcaid" local name = "Medicaid"
	if "`prog'" == "ssival" local name = "SSI"
	if "`prog'" == "public" local name = "Public Housing"
	
preserve
*Total in each poverty bin
collapse  (sum) h_`prog' [aw = h_weight], by(year atti_spmbin)

drop if atti_spmbin == .

*Generate total SNAP going to hh with kids
egen total`prog' = total(h_`prog'), by(year)
g pct`p' = h_`prog'/total`prog'
	sum pct`p', d
	*Make sure all values are logical
	assert pct`p' >=0 & (pct`p' <= 1 | pct`p' == .)

reshape wide pct`p' h_`prog', i(year) j(atti_spmbin)
sort year
		
*Finally merge to aggregate expenditure data to get $ amounts
merge 1:1 year using "./dta/totexp_allprog.dta", keep(master match) nogen ///
	keepusing(r_tot`p') 

*Get percent spent on each bin
foreach bin in `p'1 `p'2 `p'3 `p'4 `p'5 {
	g tot`bin' = (pct`bin'*r_tot`p')
	}

*Excel output
keep year pct`p'* tot`p'*
sort year
export excel using "./out/timeseries.xlsx", sheet("`p'atti_spm") sheetreplace ///
	firstrow(variable)
	
restore

*By work status
preserve
*Total in each poverty bin
collapse  (sum) h_`prog' [aw = h_weight], by(year worker)
*Generate total SNAP going to hh with kids
egen total`prog' = total(h_`prog'), by(year)
g pct`p' = h_`prog'/total`prog'
	sum pct`p', d
	*Make sure all values are logical
	assert pct`p' >=0 & (pct`p' <= 1 | pct`p'== .)
	drop if worker == .

reshape wide pct`p' h_`prog', i(year) j(worker)
sort year
		
*Finally merge to aggregate expenditure data to get $ amounts
merge 1:1 year using "./dta/totexp_allprog.dta", keep(master match) nogen ///
	keepusing(r_tot`p') 

*Get percent spent on each bin
foreach bin in `p'0 `p'1 {
	g tot`bin' = (pct`bin'*r_tot`p')
	}

*Excel output
keep year pct`p'* tot`p'*
sort year
export excel using "./out/timeseries.xlsx", sheet("`p'work") sheetreplace ///
	firstrow(variable)
restore
} //end program loop

cap log close

