clear
set more off

****************************************************************************
****************************************************************************
**    INCOME POVERTY - AGGREGATE

** John W. McArthur & Krista Rasmussen
** January 2017
****************************************************************************
****************************************************************************

****************************************************************************
*A. Create sub-categories out of master dataset
*B. Rate of progress calculations
*C. Identify difference in rate of progress before/after MDGs
*D. Lives affected using counterfactuals
*E. Figures 4.19 + 4.20
****************************************************************************

**************************************************************
**************************************************************
 * A * Create sub-categories out of master dataset 
**************************************************************
**************************************************************

 * 1 Create aggregate "China and India" category
********************
use "chinainda_individual_HCT_long.dta", clear
gen Region = ""
replace Region = "China & India"
sort year num_poorChina
carryforward num_poorChina, replace
sort year num_poorIndia
carryforward num_poorIndia, replace
sort year popChina
carryforward popChina, replace
sort year popIndia
carryforward popIndia, replace

drop country
gen num_poorCI = num_poorChina + num_poorIndia //sum number of poor in China and Inida
gen popCI = popChina+popIndia //sum population in China and India
keep Region num_poorCI popCI year
collapse (first) Region num_poorCI popCI, by(year) //gen total number of poor and total population

reshape wide num_poorCI popCI, i(Region) j(year)
gen id=1
save "chinaindaAG_HCT.dta", replace
 

 * 2 Create EAP ex China 
********************
use "poverty_HCT.dta", clear

**merge with China India individual data
gen country="China*"
merge m:1 country using "chinainda_individual_HCT.dta"

drop _merge
keep if Region=="East Asia and Pacific" 

reshape long HCT_pct HCT_pct_CI population num_poor num_poorChina popChina, i(Region) j(Year)

**gen EAP x China, number of poor, population, poverty pct
gen num_poor_xChina = num_poor - num_poorChina 
gen pop_xChina = population - popChina
gen HCT_pct_xChina = num_poor_xChina / pop_xChina

**rename regeion to denote it as EAP x China, replace all variables with updated values
replace Region = "EAP X China"
replace HCT_pct = (HCT_pct_xChina) * 100
replace num_poor = num_poor_xChina
replace population = pop_xChina

keep HCT_pct num_poor population* Region Year

reshape wide HCT_pct num_poor population , i(Region) j(Year)

save "EAP_xChina_HCT.dta", replace


 * 3 Gen SA ex India
********************
use "poverty_HCT.dta", clear

**merge with China India aggregate data
gen country="India*"
merge m:1 country using "chinainda_individual_HCT.dta"

drop _merge
keep if Region=="South Asia" 

reshape long HCT_pct HCT_pct_CI population num_poor num_poorIndia popIndia, i(Region) j(Year)

**gen SA x India, number of poor, population, poverty pct
gen num_poor_xIndia = num_poor - num_poorIndia
gen pop_xIndia = population - popIndia
gen HCT_pct_xIndia = num_poor_xIndia / pop_xIndia

**rename regeion to denote it as SA x India, replace all variables with updated values
replace Region = "SA X India"
replace HCT_pct = (HCT_pct_xIndia) * 100
replace num_poor = num_poor_xIndia
replace population = pop_xIndia

keep HCT_pct num_poor population* Region Year

reshape wide HCT_pct num_poor population , i(Region) j(Year)

save "SA_xIndia_HCT.dta", replace


 * 4 Create Developing ex China & India 
********************
use "poverty_HCT.dta", clear

**merge with China India individual data
merge m:1 id using "chinaindaAG_HCT.dta"

drop _merge
keep if Region=="Developing World" 

reshape long HCT_pct population num_poor num_poorCI popCI, i(Region) j(Year)

**gen developing x China and India, number of poor, population, poverty pct.
gen num_poor_xChiInd = num_poor - num_poorCI // number of poor in all dev countries minus number of poor in China and India
gen pop_xChiInd = population - popCI // total population in all dev countries minus number or poor in China and India
gen HCT_pct_xChiInd = num_poor_xChiInd / pop_xChiInd //gen head count (% of poor) in dev ex. China and India

**rename region to denote it as developing world x China and India, replace all variables with updated values
replace Region = "Developing world X China & India"
replace HCT_pct = (HCT_pct_xChiInd) * 100 // to match format of other HCT
replace num_poor = num_poor_xChiInd
replace population = pop_xChiInd

keep HCT_pct num_poor population* Region id Year

reshape wide HCT_pct num_poor population , i(Region id) j(Year)

save "dev_xChiInd_HCT.dta", replace


 * 5 Create Rest of World
********************
use "poverty_HCT.dta", clear

**merge with China India individual data
merge m:1 id using "chinaindaAG_HCT.dta"

drop _merge
keep if Region=="Developing World" | Region=="Sub-Saharan Africa"

**create number of poor and population in China + India + SSA
foreach x of numlist 1990 1993 1996 1999 2002 2005 2008 2010 2011 2012 2013{
gen num_poorCI_SSA`x' = num_poor`x' + num_poorCI`x' if Region=="Sub-Saharan Africa"
 * fill so Developing World has values
sort num_poorCI_SSA`x'
carryforward num_poorCI_SSA`x', replace
 * population in China + India + SSA
gen popCI_SSA`x' = population`x' + popCI`x' if Region=="Sub-Saharan Africa"
sort popCI_SSA`x'
carryforward popCI_SSA`x', replace
}

keep if Region=="Developing World" //drop SSA

reshape long HCT_pct population num_poor num_poorCI_SSA popCI_SSA, i(Region) j(Year)

**gen Rest of World, number of poor, population, poverty pct.
gen num_poor_ROW = num_poor - num_poorCI_SSA // number of poor in all dev countries minus number of poor in China + India + SSA
gen pop_ROW = population - popCI_SSA // total population in all dev countries minus number or poor in China + India + SSA
gen HCT_pct_ROW = num_poor_ROW / pop_ROW //gen head count (% of poor) in Rest of World

**rename region to denote it as developing world x China and India, replace all variables with updated values
replace Region = "Rest of World"
replace HCT_pct = (HCT_pct_ROW) * 100 // to match format of other HCT
replace num_poor = num_poor_ROW
replace population = pop_ROW

keep HCT_pct num_poor population* Region id Year

reshape wide HCT_pct num_poor population , i(Region id) j(Year)

save "ROW_HCT.dta", replace


********************
 * 6 Create master aggregate poverty dataset with all WB groups plus China, India, EAP ex. China and SA ex. India
***********************
use "poverty_HCT.dta", clear
append using "dev_xChiInd_HCT.dta"
append using "chinainda_individual_HCT.dta"
append using "SA_xIndia_HCT.dta"
append using "EAP_xChina_HCT.dta"
append using "ROW_HCT.dta"

drop num_poorIndia* num_poorChina*
drop popIndia* popChina* country

for num 1990 1993 1996 1999 2002 2005 2008 2010 2011 2012 2013: replace populationX = population_allX if Region=="China*" | Region=="India*"

save "poverty_HCT_full.dta", replace



**************************************************************
**************************************************************
 * B * Rate of progress calculations
**************************************************************
**************************************************************
use "poverty_HCT_full.dta", clear

foreach var of varlist HCT_pct*{
replace `var' = `var'/100
}
**Calculate pre-MDG rate of progress
gen rp9002 = (HCT_pct1990 - HCT_pct2002)/12
gen rp9602 = (HCT_pct1996 - HCT_pct2002)/6

**Calculate post-MDG rate of progress
gen rp0213 = (HCT_pct2002 - HCT_pct2013)/11 //All aggregates


**************************************************************
**************************************************************
 * C * Identify difference in rate of progress before/after MDGs
**************************************************************
**************************************************************
*1990-2002 v. 2002-2013 and 1996-2002 v. 2002-2013

local listb "9002 9602" //years 1990-2002 & 1996-2002
local listc "0213" //years 2002-2013

foreach pre of local listb{
foreach pos of local listc{

gen difrp`pre'_`pos' = rp`pos'-rp`pre' 
label var difrp`pre'_`pos' "dif in rate of progress, (`pos')-(`pre')"

gen fulldata`pre'_`pos'=0
replace fulldata`pre'_`pos' = 1 if rp`pre'!=. & rp`pos'!=.
label var fulldata`pre'_`pos' "# countries with data for all years `pre' `pos'"
}
}

**************************************************************
**************************************************************
 * D * Use rate of progress to generate counterfactual for 2003-2015
**************************************************************
**************************************************************
*1990-2002  1996-2002
 
local listb "9002 9602"

foreach pre of local listb{
for num 2003/2015: gen cf`pre'_X = HCT_pct2002-(rp`pre'*(X - 2002)) if rp`pre'!=.
for num 2003/2015: replace cf`pre'_X = 0 if cf`pre'_X < 0 & cf`pre'_X!=. //can't have less than 0% in poverty
for num 2003/2015: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}
 
save "poverty_HCT_full_cfmdg_allcountries.dta", replace


**************************************************************
**************************************************************
 * E * Lives affected using counterfactuals
**************************************************************
**************************************************************

**use World Bank's included "num_poor" for calculations

* Counterfactual A: Additional lives affected based on 1990-2002 rates** 
*************************************
 **This indicator is extrapolating in the aggregate instead of country-level
 **Replace "Developing world" with China + India + SSA + Rest of World and "Developing ex. China and India" with SSA + Rest of World

* 1 * calculate estimate lives affected per year for each category
for num  2005 2008 2010 2011 2012 2013: gen xaff_cf9002_X  = ((cf9002_X / HCT_pctX )-1) * (num_poorX ) 
for num  2005 2008 2010 2011 2012 2013: label variable xaff_cf9002_X "Counterfactual A: estimated lives affected in X "

* 2 * Replace "Developing world" with the sum of lives affected in "China" + "India" + "SSA" + "Rest of World"
foreach num of numlist 2005 2008 2010 2011 2012 2013{
  *2a find the sum of lives affected each year in  "China" + "India" + "SSA" + "Rest of World"
egen adjusted_devA`num' = sum(xaff_cf9002_`num') if Region=="Rest of World" | Region=="China*" | Region=="India*" | Region=="Sub-Saharan Africa"
sort adjusted_devA`num'
carryforward adjusted_devA`num', replace
  *2b replace lives affected in developing world with adjusted total
replace xaff_cf9002_`num' = adjusted_devA`num' if Region=="Developing World"
}

*3 * Replace "Developing ex. China and India: with the sum of lives affected in "SSA" + "Rest of World"
foreach num of numlist 2005 2008 2010 2011 2012 2013{
  *2a find the sum of lives affected each year in  "SSA" + "Rest of World"
egen adjusted_devxCIA`num' = sum(xaff_cf9002_`num') if Region=="Rest of World" | Region=="Sub-Saharan Africa"
sort adjusted_devxCIA`num'
carryforward adjusted_devxCIA`num', replace
  *2b replace lives affected in developing world ex. China and India with adjusted total
replace xaff_cf9002_`num' = adjusted_devxCIA`num' if Region=="Developing world X China & India"
}

* 4 * Calculate total people that would have been in poverty if 1990-2002 trajectory had continued (use for Figure 4.20)
gen num_poor_cf9002_2013  = cf9002_2013 * population2013 
label variable num_poor_cf9002_2013 "Counterfactual A: estimated total people in poverty in 2013"

* 5 * Replace "developing world" number of poor with the sum of number of poor in "China" + "India" + "SSA" + "Rest of World" (use for Figure 4.20)
egen adjusted_dev9002 = sum(num_poor_cf9002_2013) if Region=="Rest of World" | Region=="China*" | Region=="India*" | Region=="Sub-Saharan Africa"
sort adjusted_dev9002
carryforward adjusted_dev9002, replace
replace num_poor_cf9002_2013 = adjusted_dev9002 if Region=="Developing World"

* 6 * Replace "developing world ex China and India" number of poor with the sum of number of poor in "SSA" + "Rest of World" 
egen adjusted_devxCI9002 = sum(num_poor_cf9002_2013) if Region=="Rest of World" | Region=="Sub-Saharan Africa"
sort adjusted_devxCI9002
carryforward adjusted_devxCI9002, replace
replace num_poor_cf9002_2013 = adjusted_devxCI9002 if Region=="Developing world X China & India"


* Counterfactual B: Additional lives affected based on 1996-2002 rates** 
*************************************

* 1 * calculate estimate lives affected per year for each category
for num 2005 2008 2010 2011 2012 2013: gen xaff_cf9602_X  = ((cf9602_X / HCT_pctX )-1) * (num_poorX )
for num 2005 2008 2010 2011 2012 2013: label variable xaff_cf9602_X "Counterfactual B: estimated lives affected in X, if scaled by CF/Actual"

* 2 * Replace "Developing world" with the sum of lives affected in "China" + "India" + "SSA" + "Rest of World"
foreach num of numlist 2005 2008 2010 2011 2012 2013{
  *2a find the sum of lives affected each year in "China" + "India" + "SSA" + "Rest of World"
egen adjusted_devb`num' = sum(xaff_cf9602_`num') if Region=="Rest of World" | Region=="China*" | Region=="India*" | Region=="Sub-Saharan Africa"
sort adjusted_devb`num'
carryforward adjusted_devb`num', replace
  *2b replace lives affected in developing world with adjusted total
replace xaff_cf9602_`num' = adjusted_devb`num' if Region=="Developing World"
}

* 3 * Replace "Developing world ex China and India" with the sum of lives affected in "SSA" + "Rest of World"
foreach num of numlist 2005 2008 2010 2011 2012 2013{
  *3a find the sum of lives affected each year in "SSA" + "Rest of World"
egen adjusted_devxCIb`num' = sum(xaff_cf9602_`num') if Region=="Rest of World" | Region=="Sub-Saharan Africa"
sort adjusted_devxCIb`num'
carryforward adjusted_devxCIb`num', replace
  *3b replace lives affected in developing world ex China and India with adjusted total
replace xaff_cf9602_`num' = adjusted_devxCIb`num' if Region=="Developing world X China & India"
}

save "poverty_HCT_full_analysis.dta", replace


**************************************************************
**************************************************************
 * F * Reshape for Figures
**************************************************************
**************************************************************

** Figure 4.19 Extreme income poverty trajectories in developing countries
use "poverty_HCT_full_analysis.dta", clear
sort Region

 *year data is not annual. Need to create blanks to ease graphing
foreach num of numlist 1991 1992 1994 1995 1997 1998 2000 2001{
gen HCT_pct`num' = .
}
reshape long cf9002_ cf9602_ HCT_pct, i(Region) j(Year)
keep Region Year cf9002_ cf9602_ HCT_pct  
save "poverty_HCT_linegraph.dta", replace


** Figure 4.20 People living in extreme income poverty actual vs. pre-MDG trajectory 
use "poverty_HCT_full_analysis.dta", clear

keep if Region=="Rest of World" | Region=="Sub-Saharan Africa" | Region=="China*" | Region=="India*" 
keep Region num_poor2002 num_poor2013 num_poor_cf9002_2013
