clear
set more off
cd 

****************************************************************************
****************************************************************************
**  3.  INDICATORS WITH 50 PERCENT TARGETS
** water * sanitation * income poverty * undernourishment

** John W. McArthur & Krista Rasmussen
** January 2018
****************************************************************************
****************************************************************************

****************************************************************************
***WATER * SANITATION * INCOME POVERTY
****************************************************************************
*I. Rate of progress and counterfactual calculations
	*A. Rate of progress (percentage point), pre and post-MDG
	*B. Difference in rate of progress
	*C. Extrapolate pre-MDG rate of progress to find counterfactual
*II. Country count acceleration test and lives improved
	*A. Acceleration test - country count
	*B. Lives improved compared to BAU trajectory
****************************************************************************

***************************************************
**I. RATE OF PROGRESS AND COUNTERFACTUAL CALCULATIONS
***************************************************
*For income poverty flip indicator to show "% not in poverty" to match the calculations for access to water and sanitation (increase in value is positive)
use "inc_poverty_clean", clear
foreach v of varlist n1990-n2015{
replace `v' = 1-`v'
}
save "inc_poverty__clean.dta", replace

**************************************
* A* Calculate Rate of Progress: average annual percentage point change 
**************************************
** due to variation in initial and final year of data for each country, no standard initial or final year established for all countries
 * instead, use first available data point up to 1995 for initial, and most recent available from 2010 to 2015 for final
local lista "improvedwater_pct improvedsanitation_pct inc_poverty_"

foreach p of local lista {
clear
use "`p'_clean"

 **Pre-MDG Average Annual Percentage Point Change
************************************** 
   *if data is missing for 1990, use first observation up to 1995; if data is missing for 2000 try 2001 then 1999. Label this variable as "pre".
gen rppre = (n2000-n1990)/10 
	**2000
forvalues num=9(-1)5{
local year = 2000 - `num'
 replace rppre = (n2000 - n`year')/`num' if rppre==.
   }
	**2001 (if 2000 is missing)
forvalues num=11(-1)6{
local year = 2001 - `num'
 replace rppre = (n2001 - n`year')/`num' if rppre==.
   }
	**1999 (if 2000 and 2001 are missing)   
forvalues num=9(-1)4{
local year = 1999 - `num'
 replace rppre = (n1999 - n`year')/`num' if rppre==.
   }

**Exclude countries from rate of progress calculations (acceleration tests) if they have a value in ~2000 of >=99%
**Countries >=99% were neither the target of this MDG nor have the ability to "accelerate" their rate of progress
gen all_rppre = rppre //includes all countries
replace rppre = . if n2000>=.99 & n2000!=. //includes only countries <99% in 2000
 replace rppre = . if n2001>=.99 & n2001!=. & n2000==.
 replace rppre = . if n1999>=.99 & n1999!=. & n2000==. & n2001==.

 **Post-MDG Average Annual Percentage Point Change
**************************************
   *if data is missing for 2015, use most recent observation 2010 onward. Label this variable as "post".
gen rppost = (n2015-n2000)/15
	**2000
forvalues num=14(-1)10{
local year = 2000 + `num'
 replace rppost = (n`year' - n2000)/`num' if rppost==.
   }
	**2001 (if 2000 is missing)
forvalues num=14(-1)9{
local year = 2001 + `num'
 replace rppost = (n`year' - n2001)/`num' if rppost==.
   }
	**1999 (if 2000 and 2001 are missing)   
forvalues num=16(-1)11{
local year = 1999 + `num'
 replace rppost = (n`year' - n1999)/`num' if rppost==.
   }

**For countries that reach 100% before 2015, need to calculate rate of progress up to that year instead of 2015
**e.g. if 2007 =100% & last year with data =100%, use 2007 to calculate rate of progress
*1 create a full 2015 value that finds the last year of data down to 2010
gen fulln2015 = n2015
 replace fulln2015 = n2014 if n2015==.
 replace fulln2015 = n2013 if n2015==. & n2014==.
 replace fulln2015 = n2012 if n2015==. & n2014==. & n2013==.
 replace fulln2015 = n2011 if n2015==. & n2014==. & n2013==. & n2012==.
 replace fulln2015 = n2010 if n2015==. & n2014==. & n2013==. & n2012==. & n2011==.
 
*2a Find first year 2002 onward with a value =1 and replace rate of progress calculation using that year
forvalues num=14(-1)2{
local year = 2000 + `num'
 replace rppost = (n`year' - n2000)/`num' if fulln2015==1 & fulln2015!=. & n`year'==1 & n`year'!=.
   }
   
*2b use 2001 if 2000 is missing   
forvalues num=13(-1)1{
local year = 2001 + `num'
 replace rppost = (n`year' - n2001)/`num' if n2000==. & fulln2015==1 & fulln2015!=. & n`year'==1 & n`year'!=.
   }   
   
*2c use 1999 if 2000 & 2001 are missing
forvalues num=15(-1)3{
local year = 1999 + `num'
 replace rppost = (n`year' - n1999)/`num' if n2000==. & n2001==. & fulln2015==1 & fulln2015!=. & n`year'==1 & n`year'!=.
   }   
 
**Exclude countries from rate of progress calculations (acceleration tests) if they have a value in 2000 of >=99%
**Countries >=99% were neither the target of this MDG nor have the practical ability to "accelerate" their rate of progress
gen all_rppost = rppost //includes all countries
replace rppost = . if n2000>=.99 & n2000!=. 
 replace rppost = . if n2001>=.99 & n2001!=. & n2000==.
 replace rppost = . if n1999>=.99 & n1999!=. & n2000==. & n2001==.

**************************************
* B. * Calculate difference in rate of progress before/after MDGs
**************************************

*1990-2000 v. 2000-2015 
local listb "pre" //years 1990-2000
local listc "post" //years 2000-2015

foreach pre of local listb{
foreach pos of local listc{

gen difrp`pre'_`pos' = rp`pos'-rp`pre' 
label var difrp`pre'_`pos' "dif in rate of progress, (`pos')-(`pre')"

gen fulldata`pre'_`pos'=0
replace fulldata`pre'_`pos' = 1 if rp`pre'!=. & rp`pos'!=.
label var fulldata`pre'_`pos' "countries with full data for `pre' and `pos'"
}
}
**************************************
* C. * Use rate of progress to generate counterfactual for 2001-2015
**************************************

** at 1990-2000 
local listb "pre"

foreach pre of local listb{
for num 2001/2015: gen cf`pre'_X = n2000+(rp`pre'*(X - 2000)) if rp`pre'!=.
 for num 2001/2015: replace cf`pre'_X = n2001+(rp`pre'*(X - 2001)) if rp`pre'!=. & n2000==. //to fill for those countries without 2000 data
 for num 2001/2015: replace cf`pre'_X = n1999+(rp`pre'*(X - 1999)) if rp`pre'!=. & n2000==. & n2001==. //to fill for those countries without 2000 and 2001 data
 for num 2001/2015: replace cf`pre'_X = 1 if cf`pre'_X > 1 & cf`pre'_X!=. //can't have more than 100%
for num 2001/2015: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}

save "`p'_rpfull", replace
}


***************************************************
**II. COUNTRY COUNT ACCELERATION TEST AND LIVES IMPROVED 
***************************************************

local lista "improvedwater_pct_ improvedsanitation_pct_ inc_poverty__"
foreach p of local lista{

use "`p'rpfull.dta", clear

**************************************************************************************
 * A * Acceleration test - simple country count
**************************************************************************************
* (i) Create dummy variable for countries that had any acceleration between pre and post rates
gen difrppre_post_any = 0
 replace difrppre_post_any = 1 if difrppre_post >0 //dirppre_post is RPpost minus RPpre
 replace difrppre_post_any=. if difrppre_post==.
 replace difrppre_post_any = 0 if rppost<=0 & difrppre_post!=. //exclude countries from acceleration count that are still decreasing access (even if at a slower rate)
label variable difrppre_post_any "rate of progress in 0015 greater than 9000"

* (i) Create dummy variable for countries that had "real" acceleration between pre and post rates
 **Diference in rate is >=.33 percentage points per year
 gen difrppre_post_1 = 0
 replace difrppre_post_1 = 1 if difrppre_post >=.0033 //dirppre_post is RPpost minus RPpre 
 replace difrppre_post_1=. if difrppre_post==.
 replace difrppre_post_1 = 0 if rppost<=0 & difrppre_post!=. //exclude countries from acceleration count that are still decreasing access (even if at a slower rate)
label variable difrppre_post_1 "rate of progress in 0015 >=.33 pct point greater than 9000"

save "`p'cfmdg", replace

**************************************************************************************
* B * Use counterfactuals to determine number of lives improved compared to BAU
**************************************************************************************

* Counterfactual A: Estimated number of people with access based on pre (~1990-2000) rates** 
for num 2001/2015: gen xaccess_cfpre_X  = cfpre_X * popX 
for num 2001/2015: label variable xaccess_cfpre_X "Counterfactual A: estimated X people with access"

* Counterfactual A: Outcome of estimated lives affected based on pre (1990-2000) rates** 
for num 2001/2015: gen xaff_cfpre_X  =  (nX * popX ) - xaccess_cfpre_X
for num 2001/2015: label variable xaff_cfpre_X "Counterfactual A: estimated lives affected in X, extrapolated from pre-MDG (~1990-2000) rates"

* Because of missing values, generate a full variable that uses the most recent counterfactual and actual
gen xaff_cfpre_full = xaff_cfpre_2015
 replace xaff_cfpre_full = xaff_cfpre_2014 if xaff_cfpre_2015==.
 replace xaff_cfpre_full = xaff_cfpre_2013 if xaff_cfpre_2015==. & xaff_cfpre_2014==.
 replace xaff_cfpre_full = xaff_cfpre_2012 if xaff_cfpre_2015==. & xaff_cfpre_2014==. & xaff_cfpre_2013==.
 replace xaff_cfpre_full = xaff_cfpre_2011 if xaff_cfpre_2015==. & xaff_cfpre_2014==. & xaff_cfpre_2013==. & xaff_cfpre_2012==.
 replace xaff_cfpre_full = xaff_cfpre_2010 if xaff_cfpre_2015==. & xaff_cfpre_2014==. & xaff_cfpre_2013==. & xaff_cfpre_2012==. & xaff_cfpre_2011==.
 
 * Calculate total lives affected in different groups
 
**All developing countries
for num 2001/2015: egen dev_cfA_lives_X = sum(xaff_cfpre_X) 
for num 2001/2015: label variable dev_cfA_lives_X "Counterfactual A: developing world total lives affected in X"
 *total lives affected in final year
egen dev_cfA_lives_full = sum(xaff_cfpre_full) 
label variable dev_cfA_lives_full "Counterfactual A: developing world total lives affected as of most recent year"
 * calculate number of countries within developing country sample (those with data)
gen count=1
egen dev_cfA_count_full = sum(count) if xaff_cfpre_full!=.

**By Region
for num 2001/2015: egen region_cfA_lives_X = sum(xaff_cfpre_X), by(Region)
for num 2001/2015: label variable region_cfA_lives_X "Counterfactual A: by region, total lives affected in X"
 *total lives affected in final year
egen region_cfA_lives_full = sum(xaff_cfpre_full), by(Region)
label variable region_cfA_lives_full "Counterfactual A: by region, total lives affected in final year"

**LIC
for num 2001/2015: egen LIC_cfA_lives_X = sum(xaff_cfpre_X) if IncomeGroup=="L"
for num 2001/2015: label variable LIC_cfA_lives_X "Counterfactual A: LIC total lives affected in X"
 *total lives affected in final year
egen LIC_cfA_lives_full = sum(xaff_cfpre_full) if IncomeGroup=="L"
label variable LIC_cfA_lives_full "Counterfactual A: LIC total lives affected in final year"

**LIC ex. India
for num 2001/2015: egen LICxind_cfA_lives_X = sum(xaff_cfpre_X) if IncomeGroup=="L" & CountryCode!="IND"
for num 2001/2015: label variable LICxind_cfA_lives_X "Counterfactual A: LIC ex-India total lives affected in X"
 *total lives affected in final year
egen LICxind_cfA_lives_full = sum(xaff_cfpre_full) if IncomeGroup=="L" & CountryCode!="IND"
label variable LICxind_cfA_lives_full "Counterfactual A: LIC ex-India total lives affected in final year"

**MIC
for num 2001/2015: egen MIC_cfA_lives_X = sum(xaff_cfpre_X) if IncomeGroup!="L"
for num 2001/2015: label variable MIC_cfA_lives_X "Counterfactual A: MIC total lives affected in X"
 *total lives affected in final year
egen MIC_cfA_lives_full = sum(xaff_cfpre_full) if IncomeGroup!="L"
label variable MIC_cfA_lives_full "Counterfactual A: MIC total lives affected in final year"

**MIC ex. China
for num 2001/2015: egen MICxchn_cfA_lives_X = sum(xaff_cfpre_X) if IncomeGroup!="L" & CountryCode!="CHN"
for num 2001/2015: label variable MICxchn_cfA_lives_X "Counterfactual A: MIC ex-China total lives affected in X"
 *total lives affected in final year
egen MICxchn_cfA_lives_full = sum(xaff_cfpre_full) if IncomeGroup!="L" & CountryCode!="CHN"
label variable MICxchn_cfA_lives_full "Counterfactual A: MIC ex-China total lives affected in final year"

**Ex. China & India
for num 2001/2015: egen xChiInd_cfA_lives_X = sum(xaff_cfpre_X) if CountryCode!="CHN" & CountryCode!="IND"
for num 2001/2015: label variable xChiInd_cfA_lives_X "Counterfactual A: ex-China & India total lives affected in X"
 *total lives affected in final year
egen xChiInd_cfA_lives_full = sum(xaff_cfpre_full) if CountryCode!="CHN" & CountryCode!="IND"
label variable xChiInd_cfA_lives_full "Counterfactual A: ex-China & India total lives affected in final year"

** China
for num 2001/2015: gen China_cfA_lives_X = xaff_cfpre_X if CountryCode=="CHN"
for num 2001/2015: label variable China_cfA_lives_X "Counterfactual A: China total lives affected in X"
 *total lives affected in final year
egen China_cfA_lives_full = sum(xaff_cfpre_full) if CountryCode=="CHN"
label variable China_cfA_lives_full "Counterfactual A: China total lives affected in final year"

** India
for num 2001/2015: gen India_cfA_lives_X = xaff_cfpre_X if CountryCode=="IND"
for num 2001/2015: label variable India_cfA_lives_X "Counterfactual A: India total lives affected in X"
 *total lives affected in final year
egen India_cfA_lives_full = sum(xaff_cfpre_full) if CountryCode=="IND"
label variable India_cfA_lives_full "Counterfactual A: India total lives affected in final year"

** Rest of World (ex. India, China, SSA)
for num 2001/2015: egen ROW_cfA_lives_X = sum(xaff_cfpre_X) if CountryCode!="IND" & CountryCode!="CHN" & Region!="SSA"
for num 2001/2015: label variable ROW_cfA_lives_X "Counterfactual A: Rest of world total lives affected in X"
 *total lives affected in final year
egen ROW_cfA_lives_full = sum(xaff_cfpre_full) if CountryCode!="IND" & CountryCode!="CHN" & Region!="SSA"
label variable ROW_cfA_lives_full "Counterfactual A: Rest of world total lives affected in final year"


save "`p'_analysis", replace

}



****************************************************************************
***UNDERNOURISHMENT * UNDERNOURISHMENT AGGREGATE
****************************************************************************
*I. Create "rest of world" category
*II. Rate of progress and counterfactual calculations
	*A. Rate of progress (percentage point), pre and post-MDG
	*B. Difference in rate of progress
	*C. Extrapolate pre-MDG rate of progress to find counterfactual
*III. Country count acceleration test and lives improved
	*A. Acceleration test - country count
	*B. Lives improved compared to BAU trajectory
****************************************************************************

***************************************************
**I. CREATE "REST OF WORLD" CATEGORY
***************************************************

***Due to low country coverage for undernourishment, need to use WB aggregates when calculating lives improved
***create "Rest of World" variable
use "undernourishment_AG_clean.dta", clear //aggregate
 append using "undernourishment_clean.dta" // country-level
keep if CountryCode=="LMY" | CountryCode=="CHN" | CountryCode=="IND" | CountryCode=="SSA"

for num 1990/2015: replace regpopX = popX if regpopX==.

* 1) Calculate total people undernourished (% undernourished * population)
for num 1990/2015: gen num_undnourX = nX * regpopX

* 2) Find total undernourished in China + India + SSA (used for "Rest of the World")
foreach X of numlist 1990(1)2015{
egen ChiIndSSA_undnour`X' = sum(num_undnour`X' ) if CountryCode=="CHN" | CountryCode=="IND" | CountryCode=="SSA"
 sort ChiIndSSA_undnour`X'
 carryforward ChiIndSSA_undnour`X', replace

* 3) Find total population in  China + India + SSA
egen ChiIndSSA_pop`X' = sum(regpop`X') if CountryCode=="CHN" | CountryCode=="IND" | CountryCode=="SSA"
 sort ChiIndSSA_pop`X'
 carryforward ChiIndSSA_pop`X', replace

* 4) Subtract China/India/SSA total from low and middle total undernourished
gen xChiIndSSA_undnour`X' = num_undnour`X' - ChiIndSSA_undnour`X'
 label var xChiIndSSA_undnour`X' "number of undernourished in Rest of World (ex. China India SSA)"
  
* 5) Find total population in ROW
gen xChiIndSSA_pop`X' = regpop`X' - ChiIndSSA_pop`X'

*6) Calculate percent undernourished 
gen pct_undnourROW`X' = xChiIndSSA_undnour`X' / xChiIndSSA_pop`X'
}
* 7) Create ROW category and replace with relevant data
keep if CountryCode=="LMY"
save "undernourishment_AG_clean_LMY.dta", replace

use "undernourishment_AG_clean_LMY.dta", replace
for num 1990/2015: replace nX = pct_undnourROWX
for num 1990/2015: replace regpopX = xChiIndSSA_popX
keep CountryName CountryCode IndicatorName n* regpop*
drop num*
replace CountryName = "Rest of World, ex China India SSA"
replace CountryCode = "ROW"
save "undernourishment_AG_clean_ROW.dta", replace

* 8) Append with other undernourishment aggregates
use "undernourishment_AG_clean.dta"
append using "undernourishment_AG_clean_ROW.dta"
replace Region = "ROW" if CountryName=="Rest of World, ex China India SSA"
drop if Region == "LMY"

save "undernourishment_AG__clean.dta", replace


***************************************************
**II. RATE OF PROGRESS AND COUNTERFACTUAL CALCULATIONS
***************************************************
local lista "undernourishment undernourishment_AG_"
foreach p of local lista {
clear
use "`p'_clean"

** Flip undernourishment to show "% nourished" to match the calculations for access to water and sanitation (increase in value is positive)
foreach v of varlist n1990-n2015{
replace `v' = 1-`v'
}

**************************************
 * A * Calculate Rate of progress: average annual percentage point change
**************************************

  **Pre-MDG Average Annual Percentage Point Change
************************************** 
gen rp9100 = (n2000-n1991)/9 
gen rp9601 = (n2001-n1996)/5


 **Post-MDG Average Annual Percentage Point Change
**************************************
gen rp0015 = (n2015-n2000)/15
gen rp0115 = (n2015-n2001)/14

**Data maxes out at 95% (WDI notates this as .94999) and doesn't go higher
**For countries that reach 95% nourished (5% threshold) before 2015, need to calculate rate of progress up to that year instead of 2015
**e.g. if 2007 >=95% & last year with data >=95%, use 2007 to calculate rate of progress

* Find earliest year 2001/2002 onward with a value >=.949 and replace rate of progress calculation using that year
**RP00-15
forvalues num=14(-1)1{
local year = 2000 + `num'
 replace rp0015 = (n`year' - n2000)/`num' if n2015>=.949 & n2015!=. & n`year'>=.949 & n`year'!=.
   }

**RP01-15  
forvalues num=13(-1)1{
local year = 2001 + `num'
 replace rp0115 = (n`year' - n2001)/`num' if n2015>=.949 & n2015!=. & n`year'>=.949 & n`year'!=.
   }

**Exclude countries from rate of progress calculations if they have a value in 2000 of >=94% 
**6% undernourished is threshold
gen all_rp9100 = rp9100
 replace rp9100 = . if n2000>= .94 
 replace rp9601 = . if n2000>= .94 
 replace rp0015 = . if n2000>= .94 
gen all_rp0115 = rp0115
 replace rp0115 = . if n2000>= .94 


**************************************
 * B * Calculate difference in rate of progress before/after MDGs
**************************************

*1990-2000 v. 2000-2015 and 1996-2001 v. 2001-2015
local listb "9100 9601" //years 1990-2001, 96-01
local listc "0015 0115" //years 2000-2015

foreach pre of local listb{
foreach pos of local listc{

gen difrp`pre'_`pos' = rp`pos'-rp`pre' 
label var difrp`pre'_`pos' "dif in rate of progress, (`pos')-(`pre')"

gen fulldata`pre'_`pos'=0
replace fulldata`pre'_`pos' = 1 if rp`pre'!=. & rp`pos'!=.
label var fulldata`pre'_`pos' "countries with data for `pre' and `pos'"
}
}
drop *9100_0115 *9601_0015 
**************************************
 * C * Use rate of progress to generate counterfactual for 2001-2015
**************************************

** at 1990-2000 
local listb "9100"

foreach pre of local listb{
for num 2001/2015: gen cf`pre'_X = n2000+(rp`pre'*(X - 2000)) if rp`pre'!=.
for num 2001/2015: replace cf`pre'_X = .95 if cf`pre'_X > .95 & cf`pre'_X!=. //can't have less than 5% undernourished due to data cap
for num 2001/2015: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}

** at 1996-2001 
local listb "9601"

foreach pre of local listb{
for num 2002/2015: gen cf`pre'_X = n2001+(rp`pre'*(X - 2001)) if rp`pre'!=.
for num 2002/2015: replace cf`pre'_X = .95 if cf`pre'_X > .95 & cf`pre'_X!=. //can't have less than 5% undernourished due to data cap
for num 2002/2015: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}

save "`p'_rpfull", replace
}

***************************************************
**III. COUNTRY COUNT ACCELERATION TEST AND LIVES IMPROVED 
***************************************************
local lista "undernourishment_ undernourishment_AG__"
foreach p of local lista{

use "`p'rpfull", clear

***************************************************
 * A * Acceleration test - simple country count
***************************************************
* (i) Create dummy variable for countries that had any acceleration between 1991-2000 and 2000-2015
 **Any acceleration
gen difrp9100_0015_any = 0
 replace difrp9100_0015_any = 1 if difrp9100_0015 >0 //dirp9100_0015 is RP00-15 minus RP91-01
 replace difrp9100_0015_any=. if difrp9100_0015==.
 replace difrp9100_0015_any = 0 if rp0015<=0 & difrp9100_0015!=. //exclude countries from acceleration count that are still have increasing hunger (even if at a slower rate)
label variable difrp9100_0015_any "rate of progress in 0015 greater than 9100"

 **"Real" acceleration > .33 percentage point difference
gen difrp9100_0015_1 = 0
 replace difrp9100_0015_1 = 1 if difrp9100_0015 >=.0033 //dirp9100_0015 is RP00-15 minus RP91-01 
 replace difrp9100_0015_1=. if difrp9100_0015==.
 replace difrp9100_0015_1 = 0 if rp0015<=0 & difrp9100_0015!=. //exclude countries from acceleration count that are still have increasing hunger (even if at a slower rate)
label variable difrp9100_0015_1 "rate of progress in 0015 .33 pct point greater than 9100"

save "`p'cfmdg", replace
}


***************************************************
* B * Use counterfactuals to determine number of lives improved compared to BAU
***************************************************
**Because of missing countries, calculate aggregate categories (developing, SSA) using World Bank Aggregate
**For China and India, use country-level dataset
use "undernourishment_cfmdg.dta", clear

* Counterfactual A: Estimated number of people nourished based on 1991-2000 rates** 
for num 2001/2015: gen xnourished_cf9100_X  = cf9100_X * popX
for num 2001/2015: label variable xnourished_cf9100_X "estimated people nourished in X "

* Counterfactual A: Outcome of estimated lives affected in final year based on pre 1991-2000 rates** 
for num 2001/2015: gen xaff_cf9100_X  =  (nX * popX ) - xnourished_cf9100_X
for num 2001/2015: label variable xaff_cf9100_X "Counterfactual A: estimated lives affected in X, extrapolated from 1991-2000 rates"

 * Calculate total lives affected for China and India
** China
for num 2001/2015: gen China_cfA_lives_X = xaff_cf9100_X if CountryCode=="CHN"
for num 2001/2015: label variable China_cfA_lives_X "Counterfactual A: China total lives affected in X"

** India
for num 2001/2015: gen India_cfA_lives_X = xaff_cf9100_X if CountryCode=="IND"
for num 2001/2015: label variable India_cfA_lives_X "Counterfactual A: India total lives affected in X"


* Counterfactual B: Estimated number of people nourished based on 1996-2001 rates** 
for num 2002/2015: gen xnourished_cf9601_X  = cf9601_X  * popX 
for num 2002/2015: label variable xnourished_cf9601_X "estimated people nourished in X "

* Counterfactual B: Outcome of estimated lives affected in final year based on pre 1996-2001 rates** 
for num 2002/2015: gen xaff_cf9601_X  =  (nX * popX ) - xnourished_cf9601_X
for num 2002/2015: label variable xaff_cf9601_X "Counterfactual B: estimated lives affected in X, extrapolated from 1996-2001 rates"

 * Calculate total lives affected for China and India
** China
for num 2002/2015: gen China_cfB_lives_X = xaff_cf9601_X if CountryCode=="CHN"
for num 2002/2015: label variable China_cfB_lives_X "Counterfactual B: China total lives affected in X"

** India
for num 2002/2015: gen India_cfB_lives_X = xaff_cf9601_X if CountryCode=="IND"
for num 2002/2015: label variable India_cfB_lives_X "Counterfactual B: India total lives affected in X"

save "undernourishment__analysis", replace


*******Calculate aggregate counterfactuals using WB aggregate data due to missing country-level data
use "undernourishment_AG__cfmdg", clear

* Counterfactual A: Estimated number of people nourished based on 1991-2000 rates 
for num 2001/2015: gen xnourished_cf9100_X  = (cf9100_X * regpopX )
for num 2001/2015: label variable xnourished_cf9100_X "Counterfactual A: estimated X people nourished"

* Counterfactual A: Incremental lives affected per year based on 1991-2000 rates 
for num 2001/2015: gen xaff_cf9100_X  =  (nX * regpopX ) - xnourished_cf9100_X
for num 2001/2015: label variable xaff_cf9100_X "Counterfactual A: estimated lives affected in X, extrapolated from 1991-2000 rates"
label variable xaff_cf9100_2015 "Counterfactual A: estimated lives affected in final year, extrapolated from 1991-2000)"

* Counterfactual B: Estimated number of people nourished based on 1996-2001 rates
for num 2002/2015: gen xnourished_cf9601_X  = (cf9601_X * regpopX )
for num 2002/2015: label variable xnourished_cf9601_X "Counterfactual B: estimated X people nourished"

* Counterfactual B: Incremental lives affected per year based on 1996-2001 rates
for num 2002/2015: gen xaff_cf9601_X  =  (nX * regpopX ) - xnourished_cf9601_X
for num 2002/2015: label variable xaff_cf9601_X "Counterfactual B: estimated lives affected in X, extrapolated from 1996-2001 rates"
label variable xaff_cf9601_2015 "Counterfactual B: estimated lives affected in final year, extrapolated from 1996-2001"

save "undernourishment_AG_preanalysis.dta", replace

* To match country-level lives saved/improved calculations, "All developing" should be a sum of rest of world + China + India + SSA
* Developing ex. China and India is a sum of rest of world + SSA
*****************
append using "undernourishment__analysis.dta"

** 1. Calculate number of lives affected in rest of world + SSA + India + China for "developing"
* Counterfactual A: Estimated lives affected per year based on pre 1991-2000 rates
foreach num of numlist 2001/2015{
egen all_devxaff_cf9100_`num' = sum(xaff_cf9100_`num') if CountryCode == "ROW" | CountryCode == "CHN" | CountryCode=="IND" | CountryCode=="SSA" // All developing total lives affected each year (cfA)
 sort all_devxaff_cf9100_`num'
 carryforward all_devxaff_cf9100_`num', replace
 label variable all_devxaff_cf9100_`num' "Counterfactual A: all developing lives affected in X "
}
* Counterfactual B: Estimated lives affected per year based on pre 1996-2001 rates
foreach num of numlist 2002/2015{
egen all_devxaff_cf9601_`num' = sum(xaff_cf9601_`num') if CountryCode == "ROW" | CountryCode == "CHN" | CountryCode=="IND" | CountryCode=="SSA" // All developing total lives affected each year (cfB)
 sort all_devxaff_cf9601_`num'
 carryforward all_devxaff_cf9601_`num', replace
 label variable all_devxaff_cf9601_`num' "Counterfactual B: all developing lives affected in X "
}

** 2. Calculate number of lives affected in rest of world + SSA for "developing ex. China and India"
* Counterfactual A: Estimated lives affected per year based on pre 1991-2000 rates
foreach num of numlist 2001/2015{
egen all_devxCIaff_cf9100_`num' = sum(xaff_cf9100_`num') if CountryCode == "ROW" | CountryCode == "SSA" // Developing ex. China and India total lives affected each year (cfA)
 sort all_devxCIaff_cf9100_`num'
 carryforward all_devxCIaff_cf9100_`num', replace
 label variable all_devxCIaff_cf9100_`num' "Counterfactual A: developing x CI lives affected in X "
}
* Counterfactual B: Estimated lives affected per year based on pre 1996-2001 rates
foreach num of numlist 2002/2015{
egen all_devxCIaff_cf9601_`num' = sum(xaff_cf9601_`num') if CountryCode == "ROW" | CountryCode == "SSA" // Developing ex. China and India total lives affected each year (cfB)
 sort all_devxCIaff_cf9601_`num'
 carryforward all_devxCIaff_cf9601_`num', replace
 label variable all_devxCIaff_cf9601_`num' "Counterfactual B: all developing lives affected in X "
}
save "undernourishment_AG_adjusted.dta", replace

** 3. Replace LMY (aka "all developing") with updated values
keep CountryName CountryCode all_devxaff* xaff_cf*
keep if CountryCode == "ROW"
replace CountryCode ="LMY"
replace CountryName ="All developing"
for num 2001/2015: replace xaff_cf9100_X = all_devxaff_cf9100_X 
for num 2002/2015: replace xaff_cf9601_X = all_devxaff_cf9601_X
save "undernourishment_AG_dev_adjusted.dta", replace

** 4. Replace dev. ex CI with updated values
use "undernourishment_AG_adjusted.dta", clear
keep CountryName CountryCode all_devxCIaff* xaff_cf*
keep if CountryCode == "ROW"
replace CountryCode = "DEVX"
replace CountryName = "Developing world X China & India"
for num 2001/2015: replace xaff_cf9100_X = all_devxCIaff_cf9100_X 
for num 2002/2015: replace xaff_cf9601_X = all_devxCIaff_cf9601_X
save "undernourishment_AG_devXCI_adjusted.dta", replace

** 5. Append to add other aggregates
append using "undernourishment_AG_preanalysis.dta"
append using "undernourishment_AG_dev_adjusted.dta"
drop all_devx*

save "undernourishment_AG_analysis.dta", replace

