clear
set more off

****************************************************************
****************************************************************
*** Data includes 193 UN member states, uses World Bank regions and 
***         income group classifications as of 2016. 
***
***	a. calculate rate of change from 2005-2015
*** b. use rate of change to extrapolate progress out to 2030
*** c. check if current trajectory is on-track for SDG by 2030
*** d. for those off-track, calculate rate of progress required to meet goal
****************************************************************
****************************************************************

****************************************************************
* 1. Maternal Mortality 
****************************************************************
use "SDGmmr_clean.dta", clear

 * a. Proportional rate of progress from 2005-2015 
gen mmr_rp0515 = 1- (mmr_n2015/mmr_n2005)^(1/10)

 * b. Extrapolation to 2030
for num 2015/2030: gen mmr_ex0515_X = mmr_n2015*((1-mmr_rp0515)^(X-2015))
for num 2015/2030: label variable mmr_ex0515_X "MMR X projection from 2005-15 rates"

 * c. create dummy marking level achievement (SDG yes by 2030; SDG yes in 2015)
gen mmr_sdgyes2030_0515 = (mmr_ex0515_2030<=70) if mmr_ex0515_2030!=.
gen mmr_sdgyes2015 = (mmr_n2015<=70) if mmr_n2015!=.

 * d. calculate rate of progress needed to reach SDG
gen mmr_rpSDGneed = 1 - (70/mmr_n2015)^(1/15)

**Because some countries have already achieved threshold, above calculation would have them go in the opposite direction
**replace rate of progress needed with 0 if value extrapolated out to 2030 is < 70
replace mmr_rpSDGneed = 0 if mmr_ex0515_2030 <=70 & mmr_ex0515_2030!=.

**For countries under 70 in 2015 but projected course would take them in the wrong direction (2030 value > 70)
***calculate rpSDGneed as staying the same value in 2015 (not regressing further)
replace mmr_rpSDGneed = 0 if mmr_n2015 <=70 

**calculate percentage point increase required to reach needed speed
gen mmr_pctpSDGneed = mmr_rpSDGneed - mmr_rp0515
 replace mmr_pctpSDGneed = 0 if mmr_pctpSDGneed < 0
 replace mmr_pctpSDGneed = 0 if mmr_ex0515_2030 <=70
 label variable mmr_pctpSDGneed "pct. point increase required to reach 70 by 2030"


save "SDGmmr_ex2030.dta", replace


****************************************************************
*2. U5MR
****************************************************************
use "SDGu5mr_clean.dta", clear

 * a. Proportional rate of progress from 2005-2015 
gen u5mr_rp0515 = 1- (u5mr_n2015/u5mr_n2005)^(1/10)

 * b. Extrapolation to 2030
for num 2015/2030: gen u5mr_ex0515_X = u5mr_n2015*((1-u5mr_rp0515)^(X-2015))
for num 2015/2030: label variable u5mr_ex0515_X "U5MR X projection from 2005-15 rates"

 * c. create dummy marking SDG achievement (SDG yes by 2030; SDG yes in 2015)
gen u5mr_sdgyes2030_0515 = (u5mr_ex0515_2030<=25) if u5mr_ex0515_2030!=.
gen u5mr_sdgyes2015 = (u5mr_n2015<=25) if u5mr_n2015!=.

 * d. calculate rate of progress needed to reach SDG
gen u5mr_rpSDGneed = 1 - (25/u5mr_n2015)^(1/15)

**Because some countries have already achieved threshold, above calculation would have them go in the opposite direction
**replace rate of progress needed with 0 if extrapolated out to 2030 is < 25
replace u5mr_rpSDGneed = 0 if u5mr_ex0515_2030 <=25 & u5mr_ex0515_2030!=.

**For countries under 25 in 2015 but projected course would take them in the wrong direction (2030 value > 25)
***calculate rpSDGneed as staying the same value in 2015 (not regressing further)
replace u5mr_rpSDGneed = 0 if u5mr_n2015 <=25

**calculate percentage point increase required to reach needed speed
gen u5mr_pctpSDGneed = u5mr_rpSDGneed - u5mr_rp0515
 replace u5mr_pctpSDGneed = 0 if u5mr_pctpSDGneed < 0
 replace u5mr_pctpSDGneed = 0 if u5mr_ex0515_2030 <=25
 label variable u5mr_pctpSDGneed "pct. point increase required to reach 25 by 2030"

save "SDGu5mr_ex2030.dta", replace


****************************************************************
*3. NMR
****************************************************************
use "SDGnmr_clean.dta", clear

 * a. Proportional rate of progress from 2005-2015 
gen nmr_rp0515 = 1- (nmr_n2015/nmr_n2005)^(1/10)

 * b. Extrapolation to 2030
for num 2015/2030: gen nmr_ex0515_X = nmr_n2015*((1-nmr_rp0515)^(X-2015))
for num 2015/2030: label variable nmr_ex0515_X "NMR X projection from 2005-15 rates"

 * c. create dummy marking SDG achievement (SDG yes by 2030; SDG yes in 2015)
gen nmr_sdgyes2030_0515 = (nmr_ex0515_2030<=12) if nmr_ex0515_2030!=.
gen nmr_sdgyes2015 = (nmr_n2015<=12) if nmr_n2015!=.

 * d. calculate rate of progress needed to reach SDG
gen nmr_rpSDGneed = 1 - (12/nmr_n2015)^(1/15)

**Because some countries have already achieved threshold, above calculation would have them go in the opposite direction
**replace with 0 if extrapolation out to 2030 is <= 12
replace nmr_rpSDGneed = 0 if nmr_ex0515_2030 <=12 & nmr_ex0515_2030!=.

**For countries under 12 in 2015 but projected course would take them in the wrong direction (2030 value > 12)
***calculate rpSDGneed as staying the same value in 2015 (not regressing further)
replace nmr_rpSDGneed = 0 if nmr_n2015 <=12

**calculate percentage point increase required to reach needed speed
gen nmr_pctpSDGneed = nmr_rpSDGneed - nmr_rp0515
 replace nmr_pctpSDGneed = 0 if nmr_pctpSDGneed < 0
 replace nmr_pctpSDGneed = 0 if nmr_ex0515_2030 <=12
 label variable nmr_pctpSDGneed "pct. point increase required to reach 12 by 2030"

save "SDGnmr_ex2030.dta", replace

****************************************************************
*4. Water and Sanitation
****************************************************************
local lista "water_ sanitation_"
foreach p of local lista{

use "SDGimproved`p'pct_clean", clear
for num 1990/2015: rename nX `p'nX

 * a. Percentage point rate of progress from 2005-2015 (using down to 2010 if 2015 is missing)
gen `p'rp = (`p'n2015-`p'n2005)/10
 replace `p'rp = (`p'n2014-`p'n2005)/9 if `p'n2015==.
 replace `p'rp = (`p'n2013-`p'n2005)/8 if `p'n2015==. & `p'n2014==.
 replace `p'rp = (`p'n2012-`p'n2005)/7 if `p'n2015==. & `p'n2014==. & `p'n2013==.
 replace `p'rp = (`p'n2011-`p'n2005)/6 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 replace `p'rp = (`p'n2010-`p'n2005)/5 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.
 
 replace `p'rp = (`p'n2015-`p'n2004)/11 if `p'n2005==.
 replace `p'rp = (`p'n2014-`p'n2004)/10 if `p'n2005==. & `p'n2015==.
 replace `p'rp = (`p'n2013-`p'n2004)/9 if `p'n2005==. & `p'n2015==. & `p'n2014==.
 replace `p'rp = (`p'n2012-`p'n2004)/8 if `p'n2005==. & `p'n2015==. & `p'n2014==. & `p'n2013==.
 replace `p'rp = (`p'n2011-`p'n2004)/7 if `p'n2005==. & `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 replace `p'rp = (`p'n2010-`p'n2004)/6 if `p'n2005==. & `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.

 * b. Create a "full 2015" value  to include countries without data in 2015
gen `p'fulln2015 = `p'n2015
 replace `p'fulln2015 = `p'n2014 if `p'n2015==.
 replace `p'fulln2015 = `p'n2013 if `p'n2015==. & `p'n2014==.
 replace `p'fulln2015 = `p'n2012 if `p'n2015==. & `p'n2014==. & `p'n2013==.
 replace `p'fulln2015 = `p'n2011 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 replace `p'fulln2015 = `p'n2010 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.
 
 * c. Extrapolation to 2030
for num 2016/2030: gen `p'ex_X = `p'n2015+(`p'rp*(X-2015)) //post2 = RP 2005-~2015
 for num 2016/2030: replace `p'ex_X = `p'n2014+(`p'rp*(X-2014)) if `p'n2015==.
 for num 2016/2030: replace `p'ex_X = `p'n2013+(`p'rp*(X-2013)) if `p'n2015==. & `p'n2014==.
 for num 2016/2030: replace `p'ex_X = `p'n2012+(`p'rp*(X-2012)) if `p'n2015==. & `p'n2014==. & `p'n2013==.
 for num 2016/2030: replace `p'ex_X = `p'n2011+(`p'rp*(X-2011)) if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 for num 2016/2030: replace `p'ex_X = `p'n2010+(`p'rp*(X-2010)) if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.
for num 2016/2030: label variable `p'ex_X "X projection from 2005-~15 rates"
for num 2016/2030: replace `p'ex_X = 1 if `p'ex_X > 1 & `p'ex_X!=.

 * d. create dummy marking SDG achievement (SDG yes by 2030; SDG yes in 2015)
gen `p'sdgyes2030 = (`p'ex_2030>=1) if `p'ex_2030!=.
gen `p'sdgyes2015 = (`p'fulln2015>=1) if `p'fulln2015!=.

 * e. calculate rate of progress needed to reach SDG
gen `p'rpSDGneed = (1-`p'n2015)/15
 replace `p'rpSDGneed = (1- `p'n2014)/16 if `p'n2015==.
 replace `p'rpSDGneed = (1- `p'n2013)/17 if `p'n2015==. & `p'n2014==.
 replace `p'rpSDGneed = (1- `p'n2012)/18 if `p'n2015==. & `p'n2014==. & `p'n2013==.
 replace `p'rpSDGneed = (1- `p'n2011)/19 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 replace `p'rpSDGneed = (1- `p'n2010)/20 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.


**For countries at 100% in 2015 calculate rpSDGneed as staying the same value in 2015
replace `p'rpSDGneed = 0 if `p'fulln2015 ==1

**calculate percentage point increase required to reach needed speed
gen `p'pctpSDGneed = `p'rpSDGneed - `p'rp
 replace `p'pctpSDGneed = 0 if `p'pctpSDGneed < 0
 replace `p'pctpSDGneed = 0 if `p'ex_2030 ==1
 label variable `p'pctpSDGneed "pct. point increase required to reach 100% by 2030"

save "SDGimproved`p'pct_ex2030", replace
 
}

****************************************************************
*5. Undernourishment
****************************************************************
use "SDGundernourishment_clean.dta", clear

 * a. flip direction so undernourishment shows % nourished
foreach var of varlist n*{
replace `var' = 1-`var'
rename `var' nour_`var'
}

 * b. Percentage point rate of progress from 2005-2015
gen nour_rp0515 = (nour_n2015-nour_n2005)/10

 * c. Extrapolation to 2030
for num 2016/2030: gen nour_ex0515_X = nour_n2015+(nour_rp0515*(X-2015))
for num 2016/2030: label variable nour_ex0515_X "X projection from 2005-15 rates"
for num 2016/2030: replace nour_ex0515_X = 1 if nour_ex0515_X > 1 & nour_ex0515_X!=.

 * d.  create dummy marking level achievement by 2030
gen nour_sdgyes2030_0515 = (nour_ex0515_2030>=1) if nour_ex0515_2030!=.

**Data is capped at 5%. World Bank describes countries that reach this threshold as "<5%" with observation recorded as ".0500000119". 
**We cannot extrapolate for these countries. Instead, we assume they reach 0% undernourishment by 2030.
replace nour_sdgyes2030_0515 = 1 if nour_n2015>=.94999 & nour_n2015!=. 

save "SDGundernourishment_ex2030.dta", replace


****************************************************************
*6. Primary school completion rate 
****************************************************************
local lista "pcr_"
foreach p of local lista{

use "SDG`p'clean", clear
for num 1985/2015: replace nX = 1 if nX>1 & nX!=. //cap all values at 100%

for num 1985/2015: rename nX `p'nX

 * a. Percentage point rate of progress from 2005-2015 (using down to 2010 if 2015 is missing)
gen `p'rp = (`p'n2015-`p'n2005)/10
 replace `p'rp = (`p'n2014-`p'n2005)/9 if `p'n2015==.
 replace `p'rp = (`p'n2013-`p'n2005)/8 if `p'n2015==. & `p'n2014==.
 replace `p'rp = (`p'n2012-`p'n2005)/7 if `p'n2015==. & `p'n2014==. & `p'n2013==.
 replace `p'rp = (`p'n2011-`p'n2005)/6 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 replace `p'rp = (`p'n2010-`p'n2005)/5 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.

 replace `p'rp = (`p'n2015-`p'n2004)/11 if `p'n2005==.
 replace `p'rp = (`p'n2014-`p'n2004)/10 if `p'n2005==. & `p'n2015==.
 replace `p'rp = (`p'n2013-`p'n2004)/9 if `p'n2005==. & `p'n2015==. & `p'n2014==.
 replace `p'rp = (`p'n2012-`p'n2004)/8 if `p'n2005==. & `p'n2015==. & `p'n2014==. & `p'n2013==.
 replace `p'rp = (`p'n2011-`p'n2004)/7 if `p'n2005==. & `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 replace `p'rp = (`p'n2010-`p'n2004)/6 if `p'n2005==. & `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.

 * b. Create a "full 2015" value  to include countries without data in 2015
gen `p'fulln2015 = `p'n2015
 replace `p'fulln2015 = `p'n2014 if `p'n2015==.
 replace `p'fulln2015 = `p'n2013 if `p'n2015==. & `p'n2014==.
 replace `p'fulln2015 = `p'n2012 if `p'n2015==. & `p'n2014==. & `p'n2013==.
 replace `p'fulln2015 = `p'n2011 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 replace `p'fulln2015 = `p'n2010 if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.
 
 * c. Extrapolation to 2030
for num 2016/2030: gen `p'ex_X = `p'n2015+(`p'rp*(X-2015)) //post2 = RP 2005-~2015
 for num 2016/2030: replace `p'ex_X = `p'n2014+(`p'rp*(X-2014)) if `p'n2015==.
 for num 2016/2030: replace `p'ex_X = `p'n2013+(`p'rp*(X-2013)) if `p'n2015==. & `p'n2014==.
 for num 2016/2030: replace `p'ex_X = `p'n2012+(`p'rp*(X-2012)) if `p'n2015==. & `p'n2014==. & `p'n2013==.
 for num 2016/2030: replace `p'ex_X = `p'n2011+(`p'rp*(X-2011)) if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==.
 for num 2016/2030: replace `p'ex_X = `p'n2010+(`p'rp*(X-2010)) if `p'n2015==. & `p'n2014==. & `p'n2013==. & `p'n2012==. & `p'n2011==.
for num 2016/2030: label variable `p'ex_X "X projection from 2005-~15 rates"
for num 2016/2030: replace `p'ex_X = 1 if `p'ex_X > 1 & `p'ex_X!=.

 * d. create dummy marking level achievement (SDG yes by 2030; SDG yes in 2015)
gen `p'sdgyes2030 = (`p'ex_2030>=1) if `p'ex_2030!=.
gen `p'sdgyes2015 = (`p'fulln2015>=1) if `p'fulln2015!=.


save "SDG`p'ex2030", replace
}


****************************************************************
*7. Merge files 
****************************************************************

use "SDGu5mr_ex2030.dta", clear
merge 1:1 CountryCode using "SDGmmr_ex2030.dta"
drop _merge
merge 1:1 CountryCode using "SDGnmr_ex2030.dta"
drop _merge
merge 1:1 CountryCode using "SDGimprovedwater_pct_ex2030.dta"
drop _merge
merge 1:1 CountryCode using "SDGimprovedsanitation_pct_ex2030.dta"
drop _merge
merge 1:1 CountryCode using "SDGundernourishment_ex2030.dta"
drop _merge
merge 1:1 CountryCode using "SDGpcr_ex2030.dta"
drop _merge

save "fulldataset_SDG.dta", replace

order CountryName CountryCode Region IncomeGroup
drop *rp*

save "SDG2030_table.dta", replace

****************************************************************
* For Table 1: Number of countries on-track by target
**use the following variables: nour_sdgyes2030; mmr_sdgyes2030; nmr_sdgyes2030_0515; water_sdgyes2030; sanitation_sdgyes2030; pcr_sdgyes2030;

* For Table 2: Country-level 2030 trajectories
keep CountryName Region IncomeGroup nour_n2015 nour_ex0515_2030 mmr_n2015 mmr_ex0515_2030 u5mr_n2015 u5mr_ex0515_2030 nmr_n2015 nmr_ex0515_2030 pcr_n2015 pcr_ex_2030 water_n2015 water_ex_2030 sanitation_n2015 sanitation_ex_2030 
order CountryName Region IncomeGroup nour_n2015 nour_ex0515_2030 mmr_n2015 mmr_ex0515_2030 u5mr_n2015 u5mr_ex0515_2030 nmr_n2015 nmr_ex0515_2030 pcr_n2015 pcr_ex_2030 water_n2015 water_ex_2030 sanitation_n2015 sanitation_ex_2030
sort Region IncomeGroup CountryName

 * Make adjustments to undernourishment to account for lack of measurement below 5% undernourishment
replace nour_n2015 = 1-nour_n2015 //return data to showing undernourishment instead of % nourished
replace nour_ex0515_2030 = 1-nour_ex0515_2030
tostring nour_n2015 nour_ex0515_2030, replace force
replace nour_n2015 = "<5" if nour_n2015==".0500000119" //World Bank uses this to denote country has <5% undernourishment
replace nour_ex0515_2030 = "~0" if nour_n2015=="<5" // Assume countries labeled as "<5%" in 2015 reach "~0%" by 2030 because we cannot extrapolate

* For Figure 2 (Maps): Comparing 2030 trajectories for water and sanitation
** use 1-water_ex_2030 and 1-sanitation_ex_2030
