clear
set more off
cd "V:\krasmussen\M\Data\SDG trajectories\data\stata\Final dofiles\For publication\"


********************************************************************************
********************************************************************************
** HOW MANY PEOPLE WILL THE WORLD LEAVE BEHIND?
** Assessing current trajectories on the Sustainable Development Goals

* Analysis

* 1. Establish 2030 trajectories and identify SDG status on BAU
	* A. Classify indicator denominator & add data ceiling
	* B. Rate of progress
	* C. Identify target type, set SDG end target, and identify "2015" value for distance and relative targets
	* D. 2030 Trajectory
	* E. Establish on/off track in 2030 on current trajectory
	* F. Extrapolate trajectory required for country to reach SDG by end year
	* G. Identify Gaps between BAU and SDG in 2015 and final target year
	
* 2. People left behind
	* A. Match indicator with relevant population category
	* B. Calculate lives affected in each year under various scenarios
	* C. Create variable of relevant population in SDG final year to use in graphs

* Homi Kharas * John W. McArthur * Krista Rasmussen
* September 11, 2018
********************************************************************************
********************************************************************************

** Code runs for all 30 assessable outcome targets + 3 proxy targets, as 
** identified in Figure 1, regardless of data availability

** Note: The following indicators have insufficient data for inclusion in paper:
 ** 1.2.1, 1.3.1, 1.4.2, 2.3, 3.8.1, 4.6.1, 5.3.1, 5.3.2, 5.6.1, 8.7.1, 10.1.1
 ** 11.1.1, 16.2.2

********************************************************************************
********************************************************************************
** 1. Establish 2030 trajectories and identify SDG status on BAU

	* A. Classify indicator denominator & add data ceiling
	* B. Rate of progress
	* C. Identify target type, set SDG end target, and identify "2015" value for distance and relative targets
	* D. 2030 Trajectory
	* E. Establish on/off track in 2030 on current trajectory
	* F. Extrapolate trajectory required for country to reach SDG by end year
	* G. Identify Gaps between BAU and SDG in 2015 and final target year
********************************************************************************
********************************************************************************

use "input\indicator_data.dta", clear

** Check if observations are available from 2012 - 2017
order CountryCode CountryName IndicatorRef n* ex_rp*, sequential
egen sum2012_2017 = rowtotal(n2012-n2017), missing
gen count2012_2017 = 1 if sum2012_2017 != .
 label variable count2012_2017 "Data available 2012-2017"

 
 
**********************************************
* A. Classify indicator denominator & add data ceiling
**********************************************
gen den_type = .
 label variable den_type "data denominator"

 * Percent
replace den_type = 100 if inlist(IndicatorRef, "1.1.1", "1.2.1", "1.3.1a", "1.3.1b", "1.4.2", "2.1.1", "2.2.1")
replace den_type = 100 if inlist(IndicatorRef, "2.2.2a", "2.2.2b", "3.7.1", "4.1.1", "4.2.2", "4.6.1")
replace den_type = 100 if inlist(IndicatorRef, "5.2.1", "5.3.1", "5.3.2", "5.5.1", "5.6.1", "6.1.1", "6.2.1")
replace den_type = 100 if inlist(IndicatorRef, "7.1.1", "8.7.1", "11.1.1", "11.6.2", "16.9.1")

 * Per 100,000
replace den_type = 100000 if inlist(IndicatorRef, "3.1.1", "3.3.2", "3.3.4", "3.4.1", "3.4.2", "3.6.1", "16.2.2", "16.1.1")

 * Per 1,000
replace den_type = 1000 if inlist(IndicatorRef, "3.2.1", "3.3.1", "3.3.3")

 * Gender Ratio
replace den_type = 1 if IndicatorRef == "5.1.1"

** Add data ceiling 
for num 2000/2017: replace nX = den_type if nX > den_type & nX != . & den_type != .

 * Special Case: Women in parliament (5.5.1); rescale to 0-100% with 50% women = 100% 
for num 2000/2017: replace nX = (nX / 50) * 100 if nX != . & IndicatorRef == "5.5.1"
for num 2000/2017: replace nX = 100 if nX > 100 & nX != . & IndicatorRef == "5.5.1"

 * No ceiling required
replace den_type = 1 if inlist(IndicatorRef, "4.5.1", "10.1.1a")
replace den_type = 100 if IndicatorRef == "3.3.5"



**********************************************
* B. Rate of progress
**********************************************
gen rp=.
  label variable rp "Recent rate of progress"
gen initial = .
  label variable initial "Initial year in rate of progress calculation, 2000-2012"
gen final = .
  label variable final "Final year in rate of progress calculation, 2012-2017"
gen final_value = .
  label variable final_value "Value in final year in rate of progress calculation"
gen initial_value = .
  label variable initial_value "Value in initial year in rate of progress calculation"
gen final_initial = .
  label variable final_initial "Difference in years between final and initial value"
  
**Distinguish proportional (p), linear (l), and linear fit (linear_fit) rates of progress calculation
gen rp_type = "p" if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.4", "3.3.5", "3.4.1", "3.4.2", "3.6.1")
  replace rp_type = "source" if IndicatorRef == "1.1.1"
  replace rp_type = "linear_fit" if inlist(IndicatorRef, "3.3.3", "4.1.1", "4.2.2", "5.5.1", "16.1.1")
  replace rp_type = "l" if rp_type != "p" & rp_type != "source" & rp_type != "linear_fit"
  label variable rp_type "Rate of progress calc: p = proportional; l = linear; linear_fit = linear fit by regression; source = pre-calculated"

  **Identify initial and final year for rate of progress calculation
   * At least five years apart, best case is 2017 & 2010
   * Final year possible range: 2012-2017
   * Initial year possible range: 2000-2012
forvalues final = 2017(-1)2012{
local lista "2010 2011 2012 2009 2008 2007 2006 2005 2004 2003 2002 2001 2000"
	foreach initial of local lista {
		replace final = `final' if n`final' != . & final == .
		replace initial = `initial' if n`initial' != . & rp == .
		replace final_value = n`final' if final_value == .
		replace initial_value = n`initial' if rp == .
		replace final_initial = final - initial if rp == .
		* 1. Proportional rate of progress  
		replace rp = ((n`final'/n`initial')^(1/(`final'-`initial'))) - 1 if rp_type == "p" & final_initial >= 5 & final_initial != . & rp == .
		* Replace proportional rate of progress with zero if initial value is zero
		replace rp = 0 if n`initial' == 0 & initial == `initial' & n`final' != . & final_initial >= 5 & final_initial != . & rp_type == "p"
		* 2. Linear rate of progress
		replace rp = (n`final' - n`initial') / (`final' - `initial') if inlist(rp_type, "l", "linear_fit") & final_initial >= 5 & final_initial != . & rp == .
	}
}
 * Remove initial values if rate of progress cannot be calculated
replace initial = . if rp==.
replace initial_value = . if rp==.

 * 3. Linear fit
 ** Indicators with large fluctuations (3.3.3, 4.1.1, 4.2.2, 5.5.1, 16.1.1)
reshape long n, i(CountryCode IndicatorRef) j(year)

gen predicted_y = .
 label variable predicted_y "predicted values from linear fit reg"
gen linear_fit_rp = .
 label variable linear_fit_rp "Beta from linear fit reg"
 
levelsof IndicatorRef if rp_type == "linear_fit", local(levels)
	foreach i of local levels{
	** For countries where initial year of data in RP calculation > 2004
	levelsof CountryCode if IndicatorRef == "`i'" & rp != . & initial > 2004 & initial != ., local(cc)
		foreach c of local cc{
		* 1. Regress year on indicator value, by indicator and country (only for countries whose data follows availability requirements posed for RP calculation)
		reg n year if year > 2004 & rp != . & IndicatorRef == "`i'" & CountryCode == "`c'"

		* 2. Predict values by indicator and country
		predict yhat if year > 2004 & rp != . & IndicatorRef == "`i'" & CountryCode == "`c'"
		replace predicted_y = yhat if year > 2004 & rp != . & IndicatorRef == "`i'" & CountryCode == "`c'"
		drop yhat

		* 3. Preserve the coefficient on year
		replace linear_fit_rp = _b[year] if rp != . & IndicatorRef == "`i'" & CountryCode == "`c'"
	}
    ** For countries where initial year of data in RP calculation < 2005
	levelsof CountryCode if IndicatorRef == "`i'" & rp != . & initial < 2005 & initial != ., local(lcc)
	foreach lc of local lcc{
		* 1. Regress year on indicator value, by indicator and country 
		reg n year if year >= 2000 & rp != . & IndicatorRef == "`i'" & CountryCode == "`lc'"

		* 2. Predict values by indicator and country
		predict yhat if year >= 2000 & rp != . & IndicatorRef == "`i'" & CountryCode == "`lc'"
		replace predicted_y = yhat if year >= 2000 & rp != . & IndicatorRef == "`i'" & CountryCode == "`lc'"
		drop yhat

		* 3. Preserve the coefficient on year
		replace linear_fit_rp = _b[year] if rp != . & IndicatorRef == "`i'" & CountryCode == "`lc'"
	}
}

save "output\linear_fit_rp.dta", replace

use "output\linear_fit_rp.dta", replace

 * 4. Replace RP with beta for relevant indicators
replace rp = linear_fit_rp if rp_type == "linear_fit"

 * 5. Add data floor and ceiling on predicted values
replace predicted_y = den_type if predicted_y > den_type & predicted_y !=. & rp_type == "linear_fit"
replace predicted_y = 0 if predicted_y < 0 & predicted_y !=. & rp_type == "linear_fit"

reshape wide n predicted_y, i(CountryCode IndicatorRef) j(year)

 * 6. Replace final year and final year value with 2017 if linear fit can be performed and missing if not
  * 5.5.1 - women in government, use actual data (not predicted)
replace final = 2017 if predicted_y2017 != . & rp_type == "linear_fit" & IndicatorRef != "5.5.1"
  replace final = . if predicted_y2017 == . & rp_type == "linear_fit" & IndicatorRef != "5.5.1"
replace final_value = predicted_y2017 if rp_type == "linear_fit" & IndicatorRef != "5.5.1"

order CountryCode CountryName IndicatorRef n2*, sequential


 ** Special case: social protection (1.3.1) 
 * 1. If 1.3.1a (ASPIRE) has trajectory data, keep rate of progress and final/initial value
 * 2. If 1.3.1a has no trajectory data and 1.3.1b (ILO) has more recent final data point, use 1.3.1b
 * 3. If 1.3.1a has no data, use 1.3.1b
 gen ind_1_3_1 = "a" if IndicatorRef == "1.3.1a"
 replace ind_1_3_1 = "b" if IndicatorRef == "1.3.1b"
 replace IndicatorRef = "1.3.1" if inlist(IndicatorRef, "1.3.1a", "1.3.1b")
  
  * 1. If 1.3.1a (ASPIRE) has trajectory data, keep RP and final/initial value
  gsort CountryCode IndicatorRef -rp
  by CountryCode IndicatorRef: carryforward rp, replace
  drop if rp != . & ind_1_3_1 == "b" & IndicatorRef == "1.3.1"
 
  * 2. If 1.3.1a has no trajectory data and 1.3.1b (ILO) has more recent final data point, use 1.3.1b
  gen final_valueB = final_value if IndicatorRef == "1.3.1" & ind_1_3_1 == "b"
  sort CountryCode IndicatorRef final_valueB
  by CountryCode IndicatorRef: carryforward final_valueB, replace
  drop if IndicatorRef == "1.3.1" & final_valueB != . & rp == . & ind_1_3_1 == "a"
  
  * 3. If 1.3.1a has no data, use 1.3.1b
  drop if IndicatorRef == "1.3.1" & final_valueB == . & rp == . & ind_1_3_1 == "b"
 
replace indicatorname = "Proportion of poor population covered by social protection floors/systems OR Proportion of the poorest quintile population covered by social assistance programs" if IndicatorRef == "1.3.1"
  drop final_valueB ind_1_3_1 
  
  
 ** Special case: birth registration (16.9.1) - countries with 100% only have levels data; assume constant trajectory
 replace rp = 0 if IndicatorRef == "16.9.1" & final_value == 100 & rp == .

 
save "output\BC_allsources_full_rp.dta", replace



**********************************************
* C. Identify target type, set SDG end target, and identify "2015" value for distance and relative targets
**********************************************
use "output\BC_allsources_full_rp.dta", clear

*** Identify target type
gen target_type = ""
 label variable target_type "target type: g = greater than; l = less than; 2020 = 2020 end"
 **Greater than targets
 replace target_type = "g" if inlist(IndicatorRef, "1.3.1", "1.4.2", "3.7.1", "4.1.1", "4.2.2", "4.6.1")
 replace target_type = "g" if inlist(IndicatorRef, "5.1.1", "5.5.1", "5.6.1", "6.1.1", "6.2.1", "7.1.1")
 replace target_type = "g" if inlist(IndicatorRef, "10.1.1a", "16.9.1")
 **Less than targets
 replace target_type = "l" if inlist(IndicatorRef, "1.1.1", "1.2.1", "2.1.1", "2.2.1", "2.2.2a", "2.2.2b", "3.1.1")
 replace target_type = "l" if inlist(IndicatorRef, "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.3.5", "3.4.1")
 replace target_type = "l" if inlist(IndicatorRef, "3.4.2", "5.2.1", "5.3.1", "5.3.2", "8.7.1", "11.1.1")
 replace target_type = "l" if inlist(IndicatorRef, "11.6.2", "16.1.1", "16.2.2")
 **2020 targets
 replace target_type = "2020_l" if IndicatorRef == "3.6.1"
 **Special case
 replace target_type = "special" if IndicatorRef == "4.5.1"

** Identify value in 2015 to use for setting relative targets and establishing distance required to reduce problem by 50%

* A. Use 2015 if available
gen final2015 = n2015
 * B. If final year of data is less than 2015, use most recent as final
 replace final2015 = final_value if final < 2015
 * C. If final year of data is 2016 and 2015 is missing, use the average of 2014 and 2016
 replace final2015 = (n2014 + n2016)/2 if final == 2016 & n2015 == . & n2014!=.
 * D. If final year of data is 2017 and 2015 is mising, use 2014; 
 replace final2015 = n2014 if final == 2017 & n2015 == . & n2014 != .
 * E. If final year of data is 2016 or 2017, and 2015 and 2014 are missing, use 2016 if available then 2017
 replace final2015 = n2016 if final > 2015 & final != . & n2015 == . & n2014 == . & source != "GBD"
 replace final2015 = n2017 if final == 2017 & final != . & n2015 == . & n2014 == . & n2016 == . & source != "GBD"
	* E2. For GBD indicators, use RP to interpolate 2015 value
	replace final2015 = final_value/(1 + rp)^1 if source == "GBD" & rp_type == "p"
	replace final2015 = final_value + (rp*-1) if source == "GBD" & rp_type == "l"
 
 * Special case: volatile indicators, use predicted values from linear fit regresion (5.5.1 - women in government use actual)
 replace final2015 = predicted_y2015 if rp_type == "linear_fit" & IndicatorRef != "5.5.1"
 
 * Special case: Undernourishment; data hits floor, replace with zero
  replace final2015 = 0 if n2015 <= 2.501 & rp!=. & IndicatorRef=="2.1.1"
    
gen SDG_end = .
 label variable SDG_end "SDG end target"

 **For targets with 100% as final
  replace SDG_end = 100 if inlist(IndicatorRef, "1.3.1", "1.4.2", "3.7.1", "4.1.1", "4.2.2", "4.6.1") 
  replace SDG_end = 100 if inlist(IndicatorRef, "5.5.1", "5.6.1", "6.1.1", "6.2.1", "7.1.1", "16.9.1")
  
 **For targets with 1 as final
  replace SDG_end = 1 if inlist(IndicatorRef, "4.5.1", "5.1.1")
 
 **For targets with 0 as final
  replace SDG_end = 0 if inlist(IndicatorRef, "2.1.1", "2.2.1", "2.2.2a", "2.2.2b", "5.2.1")
  replace SDG_end = 0 if inlist(IndicatorRef, "5.3.1", "5.3.2", "8.7.1", "11.1.1", "16.2.2")
 
 **For targets with halve as final
  replace SDG_end = final2015 / 2 if inlist(IndicatorRef, "1.2.1", "3.3.4", "3.6.1", "11.6.2", "16.1.1")

 **For targets with reduce by 1/3 as final
  replace SDG_end = final2015 - (final2015 * (1/3)) if inlist(IndicatorRef, "3.4.1", "3.4.2")
  
 **For targets with reduce by 90%
  replace SDG_end = final2015 - (final2015 * .9) if inlist(IndicatorRef, "3.3.1", "3.3.3", "3.3.5")
  
 **For targets with reduce by 80%
  replace SDG_end = final2015 - (final2015 * .8) if IndicatorRef == "3.3.2"

 **For targets with set country-level end values
  replace SDG_end = 70 if IndicatorRef == "3.1.1"
  replace SDG_end = 25 if IndicatorRef == "3.2.1"
  replace SDG_end = 3 if IndicatorRef == "1.1.1"
  *Target 10.1.1 "income growth of the bottom 40%... at a rate higher than the national average" 
   ** 10.1.1b = annual growth rate for total population
  gen _10_1_1 = n2017 if IndicatorRef == "10.1.1b" 
    replace _10_1_1 = ex_rp2030 if _10_1_1==. & IndicatorRef == "10.1.1b"
   sort CountryCode _10_1_1
   by CountryCode: carryforward _10_1_1, replace
   **10.1.1a = annual growth rate for bottom 40%
  replace SDG_end = _10_1_1 if IndicatorRef == "10.1.1a" 
   drop _10_1_1
  
label variable SDG_end "Value for SDG achievement"

 ** Special case: Target 3.3 "By 2030, end epidemic..." use WHO's relative target for SDG achievement by 2030
  * If country is below IHME's SDG threshold in final year of data (~2015), assume target already met (country does not need to reduce further)
  * 3.3.1 HIV = 0.005
  * 3.3.2 TB = 0.5
  * 3.3.3 Malaria = 0.005
  * 3.3.4 Hep B = 0.5
  * 3.3.5 NTD = 0.5
  gen SDGthresh_3_3 = 0.5 if inlist(IndicatorRef, "3.3.2", "3.3.4", "3.3.5")
   replace SDGthresh_3_3 = 0.005 if inlist(IndicatorRef, "3.3.1", "3.3.3")
   label variable SDGthresh_3_3 "SDG exclusion threshold for incidence indicators"
   
 ** Special case: Gender parity in education (4.5.1)
  * 1. If final value is between threshold of 0.97 - 1.03, assume zero rate of progress to 2030
  * 2. If initial value is between threshold 0.97 - 1.03, assume zero rate of progress to 2030
  replace rp = 0 if final_value > 0.97 & final_value < 1.03 & final_value != . & IndicatorRef == "4.5.1"
  replace rp = 0 if initial_value > 0.97 & initial_value < 1.03 & initial_value != . & IndicatorRef == "4.5.1"
  
  
  
**********************************************
* D. 2030 Trajectory
**********************************************

** Fill trajectory back to 2015 - replace with actual data if observations available
for num 2016/2017: replace ex_rpX = nX if nX != . & inlist(rp_type, "p", "l") | IndicatorRef == "5.5.1"
for num 2016/2017: replace ex_rpX = predicted_yX if predicted_yX != . & rp_type == "linear_fit" & IndicatorRef != "5.5.1"

** Proportional rate of progress  
for num 2016/2030: replace ex_rpX = final_value*(1 + rp)^( X - final) if rp_type == "p" & ex_rpX == .

** Linear rate of progress
for num 2016/2030: replace ex_rpX = final_value + (rp * ( X - final)) if ex_rpX == . & (rp_type == "l" | IndicatorRef == "5.5.1") & ex_rpX == .
 
** Linear fit indicators
for num 2016/2030: replace ex_rpX = predicted_y2017 + (rp * ( X - 2017)) if rp_type == "linear_fit" & IndicatorRef != "5.5.1" & ex_rpX == .
 for num 2016/2030: label variable ex_rpX "Value in X on current trajectory"

 ** Special case for Undernourishment: assume countries that hit 2.5% floor in 2015 (the year of the last available observation) actually reach zero
 for num 2016/2030: replace ex_rpX = 0 if n2015 <= 2.501 & rp !=. & IndicatorRef == "2.1.1"

 ** Set floors and ceilings
* Cannot exceed denominator (3.3.5 is sum of prevalence and can exceed 100%)
for num 2016/2030: replace ex_rpX = 100 if ex_rpX > 100 & SDG_end == 100 & ex_rpX !=.
for num 2016/2030: replace ex_rpX = 100 if ex_rpX > 100 & den_type == 100 & ex_rpX !=. & IndicatorRef != "3.3.5"
for num 2016/2030: replace ex_rpX = 1000 if ex_rpX > 1000 & den_type == 1000 & ex_rpX !=.
for num 2016/2030: replace ex_rpX = 1 if ex_rpX > 1 & ex_rpX != . & IndicatorRef == "5.1.1"

* Gender parity in education
for num 2016/2030: replace ex_rpX = 1 if ex_rpX > 0.97 & ex_rpX < 1.03 & ex_rpX != . & IndicatorRef == "4.5.1"
for num 2016/2030: replace ex_rpX = 1 if ex_rpX > 0.97 & final_value < 0.97 & ex_rpX != . & final_value != . & IndicatorRef == "4.5.1"
for num 2016/2030: replace ex_rpX = 1 if ex_rpX < 1.03 & final_value > 1.03 & ex_rpX != . & final_value != . & IndicatorRef == "4.5.1"

* Set floor of zero
for num 2016/2030: replace ex_rpX = 0 if ex_rpX < 0 

* Create additional trajectory where floor/ceiling is SDG target
for num 2016/2030: gen NonKinkedex_rpX = ex_rpX
 for num 2016/2030: label variable NonKinkedex_rpX "Value in X on current trajectory, can surpass SDG target"
for num 2016/2030: replace ex_rpX = SDG_end if ex_rpX < SDG_end & SDG_end != . & ex_rpX != . & inlist(target_type, "l", "2020_l") & IndicatorRef != "1.1.1"
for num 2016/2030: replace ex_rpX = SDG_end if ex_rpX > SDG_end & SDG_end != . & ex_rpX != . & target_type == "g"
 for num 2016/2030: label variable ex_rpX "Value in X on current trajectory using SDG target as floor/ceiling"
 
save "output\BC_allsources_full_2030.dta", replace



**********************************************
* E. Establish on/off track in 2030 on current trajectory
**********************************************
 ** Q1. What is the distance covered on BAU?
 ** Q2. Has country achieved SDG: is 2015 value at SDG target 
 ** Q3. Is country on track: is trajectory value at SDG target
 ** Q4. Is country moving backwards & not on track?
 ** Q5. Does country require acceleration: is share of distance covered on BAU > 50% to target?
 ** Q6. Does country require a breakthrough: is share of distance covered on BAU <= 50% to target?
 ** Q7. Does country have levels data but is missing trajectory?
 ** Q8. Is country missing data?

use "output\BC_allsources_full_2030.dta", clear
 
* Q 1. What is the distance covered on BAU? 
***********************
 ** Calculate share of distance covered on BAU
gen dist_covered = .
 label variable dist_covered "Share of distance to the target covered on current trajectory by end year"
 replace dist_covered = (ex_rp2030 - final2015) / (SDG_end - final2015) if inlist(target_type, "g", "l")
 replace dist_covered = (ex_rp2020 - final2015) / (SDG_end - final2015) if target_type == "2020_l"

  * Special case: Gender parity
  replace dist_covered = (ex_rp2030 - final2015) / (0.97 - final2015) if IndicatorRef == "4.5.1" & final2015 < 0.97
  replace dist_covered = (ex_rp2030 - final2015) / (1.03 - final2015) if IndicatorRef == "4.5.1" & final2015 > 1.03
  replace dist_covered = 1 if IndicatorRef=="4.5.1" & final2015 >= 0.97 & final2015 <= 1.03 & final2015!=. 
 

 ** Categorize trajectories
 ***********************
  * pA. Achieved as of 2015
  * A. On-track
  * B. Acceleration needed: > 50% of distance to the target
  * C. Breakthrough needed: 0-50% of distance to the target
  * D. Moving backwards
  * E. No trajectory data
  * F. No data
gen SDG_onoff = ""
 label variable SDG_onoff "pA: Achieved; A: on-track; B: Acceleration needed; C: Breakthrough needed; D: Moving backwards; E: No trajectory data; F: No data" 

** Less than targets
***********************
* Q 2a. Has country achieved SDG: is 2015 value at SDG target and country remains achieving by final year
replace SDG_onoff = "pA" if final2015 <= SDG_end & ex_rp2030 <= SDG_end & final2015 != . & SDG_end != . & target_type == "l" & !inlist(IndicatorRef, "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.3.5")
replace SDG_onoff = "pA" if final2015 <= SDG_end & ex_rp2020 <= SDG_end & final2015 != . & SDG_end != . & target_type == "2020_l"
  * Use exclusion threshold for infectious disease targets
  replace SDG_onoff = "pA" if final2015 <= SDGthresh_3_3 & final2015 != . & SDGthresh_3_3 != . & ex_rp2030 <= SDGthresh_3_3 & inlist(IndicatorRef, "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.3.5")

* Q 3a. Is country on track: is end value <= SDG_end
replace SDG_onoff = "A" if ex_rp2030 <= SDG_end & ex_rp2030 != . & SDG_end != . & target_type == "l" & SDG_onoff != "pA" 
replace SDG_onoff = "A" if ex_rp2020 <= SDG_end & ex_rp2020 != . & SDG_end != . & target_type == "2020_l" & SDG_onoff != "pA"

* Q 4a. Is country moving backwards & is not on track?
replace SDG_onoff = "D" if rp > 0 & rp != . & inlist(target_type, "l", "2020_l") & !inlist(SDG_onoff, "pA", "A")
replace SDG_onoff = "D" if ex_rp2030 > final_value & IndicatorRef == "1.1.1" & !inlist(SDG_onoff, "pA", "A")


** Greater than targets
***********************
 ** 5.5.1 - women in government use actual data (not predicted)
 replace rp_type = "l" if IndicatorRef == "5.5.1"

* Q 2b. Has country achieved SDG: is 2015 value at SDG target 
replace SDG_onoff = "pA" if final2015 >= SDG_end & ex_rp2030 >= SDG_end & final2015 != . & SDG_end != . & ex_rp2030 != . & target_type == "g"

* Q 3b. Is country on track: is end value >= SDG_end
replace SDG_onoff = "A" if ex_rp2030 >= SDG_end & ex_rp2030 != . & SDG_end != . & target_type == "g"  & SDG_onoff != "pA"
replace SDG_onoff = "A" if ex_rp2020 >= SDG_end & ex_rp2020 != . & SDG_end != . & target_type == "2020_g"  & SDG_onoff != "pA"

* Q 4b. Is country moving backwards & is not on track?
replace SDG_onoff = "D" if rp < 0 & rp!=. & inlist(target_type, "g", "2020_g") & !inlist(SDG_onoff, "pA", "A")

 
** Special case: Gender Parity in education (1 +/- .03)
***********************
* Q 2c. Has country achieved SDG: is 2015 value at SDG target 
 replace SDG_onoff = "pA" if abs(1 - final2015) <= .03 & final2015 != . & abs(1 - ex_rp2030) <= .03 & IndicatorRef == "4.5.1" 

* Q 3c. Is country on track? 
replace SDG_onoff = "A" if abs(1-ex_rp2030) <= .03 & ex_rp2030 != . & IndicatorRef == "4.5.1" & SDG_onoff != "pA"
  * Assume if 2030 trajectory crosses threshold at some point, then SDG met
   foreach num of numlist 2018(1)2030{
   replace SDG_onoff = "A" if abs(1-ex_rp`num') <= .03 & ex_rp2030 != . & IndicatorRef == "4.5.1" & SDG_onoff != "pA"
   } 
* Q 4c. Is country moving backwards & is not on track?
replace SDG_onoff = "D" if rp < 0 & final_value < 0.97 & rp != . & final_value != . & IndicatorRef == "4.5.1" & !inlist(SDG_onoff, "pA", "A")
replace SDG_onoff = "D" if rp > 0 & final_value > 1.03 & rp != . & final_value != . & IndicatorRef == "4.5.1" & !inlist(SDG_onoff, "pA", "A")


** All targets
***********************
* Q 5. Does country require acceleration: is share of distance covered > 50% to target?
replace SDG_onoff = "B" if dist_covered > .5 & dist_covered != . & !inlist(SDG_onoff, "pA", "A", "D")

* Q 6. Does country require a breakthrough: is share of distance covered <= 50% to target?
replace SDG_onoff = "C" if dist_covered <= .5 & dist_covered != . & !inlist(SDG_onoff, "pA", "A", "D", "B")

* Q 7. Does country have data but is missing trajectory?
replace SDG_onoff = "E" if count2012_2017 == 1 & SDG_onoff == "" 

* Q 8. Is country missing data?
replace SDG_onoff = "F" if count2012_2017 == . & SDG_onoff == ""
 
 
**********************************************
* F. Extrapolate trajectory required for country to reach SDG by end year
**********************************************
** "SDG_end" = end value required to meet target; created in section C
** "final_value"  = value in final year of rate of progress calculation; created in section A
** "final" = final year of rate of progress calculation; created in section A
** "final2015" = "2015" value identified in section C

* Calculate rate of progress required to meet target starting from 1) 2015 value (used for incidence and mortality indicators), 2) 2018 trajectory value (used for all)
gen SDG2015yes_rp = .
 label variable SDG2015yes_rp "rate of progress required to reach SDG by end year starting at 2015"
 
gen SDGyes_rp = .
 label variable SDGyes_rp "rate of progress required to reach SDG by end year starting at 2018"

 * Proportional rate of progress  
   * 1. Starting at 2015
  replace SDG2015yes_rp = ((SDG_end / final2015)^(1/(2030-2015)))-1 if rp_type == "p" & ex_rp2030 != .
  replace SDG2015yes_rp = ((SDG_end / final2015)^(1/(2020-2015)))-1 if rp_type == "p" & ex_rp2020 != . & IndicatorRef == "3.6.1"  
   * 2. Starting at 2018
  replace SDGyes_rp = ((SDG_end / ex_rp2018)^(1/(2030-2018)))-1 if rp_type == "p" & ex_rp2030 != .
  replace SDGyes_rp = ((SDG_end / ex_rp2018)^(1/(2020-2018)))-1 if rp_type == "p" & ex_rp2020 != . & IndicatorRef == "3.6.1"
	* For countries where equation's denominator = 0, replace SDGyes_rp with 0 (calculation cannot be performed with zero value)
	replace SDG2015yes_rp = 0 if final2015 == 0 & rp_type=="p" & ex_rp2030!=.
	replace SDGyes_rp = 0 if ex_rp2018 == 0 & rp_type=="p" & ex_rp2030!=.
	
 * Linear rate of progress & linear fit indicators
   * 1. Starting at 2015 (only applicable for linear fit indicators)
  replace SDG2015yes_rp = (SDG_end - final2015) / (2030 - 2015) if rp_type=="linear_fit" & ex_rp2030!=.
   * 2. Starting at 2018   
  replace SDGyes_rp = (SDG_end - ex_rp2018) / (2030 - 2018) if inlist(rp_type, "l", "linear_fit") & ex_rp2030!=.

  
 ** Special case: Gender parity in education
 replace SDGyes_rp = (0.97 - ex_rp2018) / (2030 - 2018) if ex_rp2030 != . & ex_rp2018 < 0.97 & ex_rp2018 != . & IndicatorRef == "4.5.1"
 replace SDGyes_rp = (1.03 - ex_rp2018) / (2030 - 2018) if ex_rp2030 != . & ex_rp2018 > 1.03 & ex_rp2018 != . & IndicatorRef == "4.5.1"
  * Replace SDG yes rate of progress with zero if ex_rp2018 is at target ( = 1)
 replace SDGyes_rp = 0 if ex_rp2018 == 1 & IndicatorRef == "4.5.1"

 
* Use required rate of progress to calculate trajectory to 2030 
 * 1. Starting at 2015
 for num 2016/2030: gen SDG2015yes_exX =.
 for num 2016/2030: label variable SDG2015yes_exX "value in X on trajectory required to reach SDG by end year starting at 2015"
 * 2. Starting at 2018
 for num 2018/2030: gen SDGyes_exX =.
 for num 2018/2030: label variable SDGyes_exX "value in X on trajectory required to reach SDG by end year starting at 2018"
 
 
 ** Proportional rate of progress  
  * 1. Starting at 2015
 for num 2016/2030: replace SDG2015yes_exX = final2015*(1 + SDG2015yes_rp)^( X - 2015) if rp_type == "p" & ex_rp2030 != .
  * 2. Starting at 2018
 for num 2018/2030: replace SDGyes_exX = ex_rp2018*(1 + SDGyes_rp)^( X - 2018) if rp_type == "p" & ex_rp2030 != .
 
 ** Linear rate of progress & linear fit indicators
  * 1. Starting at 2015 (only applicable for linear fit indicators)
 for num 2016/2030: replace SDG2015yes_exX = final2015 + (SDG2015yes_rp * ( X - 2015)) if rp_type == "linear_fit" & ex_rp2030 != .
  * 2. Starting at 2018
 for num 2018/2030: replace SDGyes_exX = ex_rp2018 + (SDGyes_rp * ( X - 2018)) if inlist(rp_type, "l", "linear_fit") & ex_rp2030 != .
 
* Due to slight differences in rounding, use SDG_end to fill 2030/2020 where appropriate
replace SDGyes_ex2030 = SDG_end if inlist(target_type, "g", "l")
replace SDG2015yes_ex2030 = SDG_end if inlist(target_type, "g", "l")
replace SDGyes_ex2020 = SDG_end if target_type == "2020_l"
replace SDG2015yes_ex2020 = SDG_end if target_type == "2020_l"

 ** Special case: Undernourishment
for num 2018/2030: replace SDGyes_exX = 0 if n2015 <= 2.501 & ex_rp2030 != . & IndicatorRef == "2.1.1"

 ** Special case: Extreme income poverty has pre-existing 2030 trajectories but missing BAU RP
  * For absolute number of people in poverty, use SDG target of 0% (differs from SDG achievement of 3%)
replace SDGyes_ex2030 = 0 if IndicatorRef == "1.1.1"

 ** Special case: gender parity in education (convert values between 0.97-1.03 to 1)
replace SDGyes_ex2030 = SDG_end if IndicatorRef == "4.5.1"

 
 ** Replace SDG yes trajectory with BAU trajectory if country is on track or already achieved target
for num 2018/2030: replace SDGyes_exX = ex_rpX if inlist(SDG_onoff, "pA", "A") & IndicatorRef != "1.1.1"
for num 2016/2030: replace SDG2015yes_exX = ex_rpX if inlist(SDG_onoff, "pA", "A") & IndicatorRef != "1.1.1"
   
** Create variable marking constant value from final2015
for num 2016/2030: gen constX = final2015
for num 2016/2030: label variable constX "Value in X assuming constant value since 2015"  
  

**********************************************
* G. Identify Gaps between BAU and SDG in 2015 and final target year
**********************************************

 * 1. Absolute gap in final value between SDG yes trajectory and BAU
gen endGap = abs(ex_rp2030 - SDG_end) if !inlist(SDG_onoff, "pA", "A") & inlist(target_type, "l", "g", "special")
 replace endGap = abs(ex_rp2020 - SDG_end) if !inlist(SDG_onoff, "pA", "A") & target_type == "2020_l"
 replace endGap = 0 if inlist(SDG_onoff, "pA", "A")
label variable endGap "Absolute gap between BAU and SDG yes"

 * 2. Absolute gap in 2015 between SDG yes trajectory and final2015
gen finalGap = abs(final2015 - SDG_end) if SDG_onoff != "pA"
 replace finalGap = 0 if SDG_onoff == "pA" 
 label variable finalGap "Absolute gap between 2015 value and SDG yes"

save "output\BC_allsources_full_2030sdg.dta", replace




********************************************************************************
********************************************************************************
** 2. People left behind

	* A. Match indicator with relevant population category
	* B. Calculate lives affected in each year under various scenarios
	* C. Create variable of relevant population in SDG final year to use in graphs
********************************************************************************
********************************************************************************

use "output\BC_allsources_full_2030sdg.dta", clear

** Add population data
merge m:1 CountryCode using "input\population_data.dta"
drop _merge

** For indicators that don't require population, create place holder (value = 1)
for num 2018/2030: gen noneX = 1


**********************************************
* A. Match indicator with relevant population category
**********************************************
gen population_type = ""
** All population
replace population_type = "pop" if inlist(IndicatorRef, "1.2.1", "1.4.2", "2.1.1", "3.3.1", "3.3.2")
replace population_type = "pop" if inlist(IndicatorRef, "3.3.4", "3.3.5", "3.4.2", "3.6.1", "5.1.1")
replace population_type = "pop" if inlist(IndicatorRef, "6.1.1", "6.2.1", "7.1.1", "11.6.2", "16.1.1", "16.2.2")
** All population for poverty clock data
replace population_type = "pop_poverty" if IndicatorRef=="1.1.1"

** Subsets
 * Less than 5 years
replace population_type = "pop_under5" if inlist(IndicatorRef, "2.2.1", "2.2.2a", "16.9.1")
 * 2-4 years
replace population_type = "pop_2_4" if IndicatorRef == "2.2.2b"
 * Less than 70 years
replace population_type = "pop_under70" if IndicatorRef == "3.4.1"
 * Women 15-49
replace population_type = "fpop_15_49" if inlist(IndicatorRef, "3.7.1", "5.3.2", "5.6.1")
 * End primary (12)
replace population_type = "pop_primary12" if IndicatorRef == "4.1.1"
 * Pre-primary (4)
replace population_type = "pop_preprimary4" if IndicatorRef == "4.2.2"
 * 15-24 years
replace population_type = "pop_15_24" if IndicatorRef == "4.6.1"
 * Women 0-79
replace population_type = "fpop_0_79" if IndicatorRef == "5.5.1"
 * Women 20-24
replace population_type = "fpop_20_24" if IndicatorRef == "5.3.1"
 * Women 15+
replace population_type = "fpop_15plus" if IndicatorRef == "5.2.1"
 * 5-17 years
replace population_type = "pop_5_17" if inlist(IndicatorRef, "4.5.1", "8.7.1")
 * Urban population
replace population_type = "urban_pop" if IndicatorRef == "11.1.1"
 
** Births
replace population_type = "birth" if inlist(IndicatorRef, "3.1.1", "3.2.1")

** Population at risk for malaria
replace population_type = "pop_malariarisk" if IndicatorRef == "3.3.3"

save "output/BC_livespre.dta", replace


**********************************************
* B. Calculate lives affected in each year under various scenarios
**********************************************
use "output/BC_livespre.dta", clear

* Mark indicators where lives at stake calculation cannot be performed (or that require special calculation - 3.4.1)
gen lives_calc = "no" if inlist(IndicatorRef, "3.3.5", "3.4.1", "4.5.1", "5.1.1", "10.1.1a")
 replace lives_calc = "yes" if lives_calc ==""


* Scenarios: 
	* 1. Lives affected in each year under BAU
	* 2. Lives affected under SDG yes scenario (trajectory starting from 2018)
	* 3. Lives affected under SDG yes scenario (trajectory starting from 2015)
	* 4. Lives affected under assuming constant 2015 value
for num 2016/2030: gen livesX = .
  for num 2016/2030: label variable livesX "Lives affected in X under BAU"
for num 2018/2030: gen SDGyes_livesX = .
  for num 2018/2030: label variable SDGyes_livesX "Lives affected in X under SDG yes scenario (starting from 2018)"
for num 2016/2030: gen SDG2015yes_livesX = .
  for num 2016/2030: label variable SDG2015yes_livesX "Lives affected in X under SDG yes scenario (starting from 2015)"  
for num 2016/2030: gen const_livesX = .
  for num 2016/2030: label variable const_livesX "Lives affected in X assuming constant 2015 value"

levelsof population_type, local(levels)
foreach p of local levels{

	* 1. BAU
	for num 2016/2030: replace livesX = `p'X * (ex_rpX / den_type) if population_type == "`p'" & lives_calc == "yes"
	* 2. SDG yes starting from 2018
	for num 2018/2030: replace SDGyes_livesX = `p'X * (SDGyes_exX / den_type) if population_type == "`p'" & lives_calc == "yes"
	* 3. SDG yes starting from 2015
	for num 2016/2030: replace SDG2015yes_livesX = `p'X * (SDG2015yes_exX / den_type) if population_type == "`p'" & lives_calc == "yes"
	* 4. Constant 2015 value
	for num 2016/2030: replace const_livesX = `p'X * (constX / den_type) if population_type == "`p'" & lives_calc == "yes"

	** Adjust to account for indicators measuring positive
	* 1. BAU
	for num 2016/2030: replace livesX = `p'X - livesX if target_type == "g" & population_type=="`p'" & lives_calc == "yes"
	* 2. SDG yes starting from 2018
	for num 2018/2030: replace SDGyes_livesX = `p'X - SDGyes_livesX if target_type == "g" & population_type == "`p'" & lives_calc == "yes"
	* 3. SDG yes starting from 2015
	for num 2016/2030: replace SDG2015yes_livesX = `p'X - SDG2015yes_livesX if target_type == "g" & population_type == "`p'" & lives_calc == "yes"
	* 4. Constant 2015 value
	for num 2016/2030: replace const_livesX = `p'X - const_livesX if target_type == "g" & population_type == "`p'" & lives_calc == "yes"
	}

	
 ** Special case: Non-communicable disease (3.4.1)
  *1. Index trajectory for probability of dying from NCD to 2015 value 
   ** BAU trajectory (indexed to 2016)
   for num 2016/2030: gen NCD_BAUprob_indexX = ex_rpX / n2016 if IndicatorRef == "3.4.1"
   ** SDG yes trajectory
    * A. Starting from 2018
   for num 2019/2030: gen NCD_SDGprob_indexX = SDGyes_exX / n2015 if IndicatorRef == "3.4.1"
    * B. Starting from 2015
   for num 2016/2030: gen NCD_SDG2015prob_indexX = SDG2015yes_exX / n2015 if IndicatorRef == "3.4.1"
   ** Constant 2015 trajectory
   for num 2016/2030: gen NCD_constprob_indexX = constX / n2015 if IndicatorRef == "3.4.1"

  *2. Index <70 year old population to 2015 or 2016 value
  for num 2016/2030: gen pop_under702015indexX = pop_under70X / pop_under702015 if IndicatorRef == "3.4.1"
  for num 2016/2030: gen pop_under702016indexX = pop_under70X / pop_under702016 if IndicatorRef == "3.4.1"

  *3. Calculate lives under each scenario (Total premature NCD deaths X Indexed probability of dying X Indexed population)
    * A. BAU trajectory (indexed to 2016)
  for num 2016/2030: replace livesX = NCD_predeaths2016 * NCD_BAUprob_indexX * pop_under702016indexX if IndicatorRef == "3.4.1"
	* B. SDG yes starting from 2018 (indexed to 2015)
  replace SDGyes_lives2018 = lives2018 if IndicatorRef == "3.4.1"
  for num 2019/2030: replace SDGyes_livesX = NCD_predeaths2015 * NCD_SDGprob_indexX * pop_under702015indexX if IndicatorRef == "3.4.1"
	* C. SDG yes starting from 2015 (indexed to 2015)
  for num 2016/2030: replace SDG2015yes_livesX = NCD_predeaths2015 * NCD_SDG2015prob_indexX * pop_under702015indexX if IndicatorRef == "3.4.1"
	* D. Constant 2015 value (indexed to 2015)
  for num 2016/2030: replace const_livesX = NCD_predeaths2015 * NCD_constprob_indexX * pop_under702015indexX if IndicatorRef == "3.4.1"

  replace lives_calc = "yes" if IndicatorRef == "3.4.1"

  ** Special case: Extreme income poverty (1.1.1)
  * Due to asymptotic data form, lives calculations are total in poverty (not just those above 3%)
  ** SDG yes trajectories
  for num 2018/2029: replace SDGyes_livesX = . if IndicatorRef == "1.1.1"
  replace SDGyes_lives2030 = 0 if IndicatorRef == "1.1.1"
  
  for num 2018/2029: replace SDG2015yes_livesX = . if IndicatorRef == "1.1.1"
  replace SDG2015yes_lives2030 = 0 if IndicatorRef == "1.1.1"
  
** Due to slight rounding in calculations (-1 to -11 total lives) replace negative lives with 0 
 * 1. BAU
for num 2016/2030: replace livesX = 0 if livesX < 0 & livesX != . & lives_calc == "yes"
 * 2. SDG yes starting from 2018
for num 2018/2030: replace SDGyes_livesX = 0 if SDGyes_livesX < 0 & SDGyes_livesX != . & lives_calc == "yes"
 * 3. SDG yes starting from 2015
for num 2016/2030: replace SDG2015yes_livesX = 0 if SDG2015yes_livesX < 0 & SDG2015yes_livesX != . & lives_calc == "yes"
 * 4. Constant 2015 value
for num 2016/2030: replace const_livesX = 0 if const_livesX < 0 & const_livesX != . & lives_calc == "yes"

**********************************************
* C. Create variable of relevant population in SDG final year to use in graphs
**********************************************
gen rel_popfinal = .
 label variable rel_popfinal "relevant 2030 population (or final target year)"
gen rel_pop2015 = .
 label variable rel_pop2015 "relevant 2015 population" 
levelsof population_type, local(levels)
foreach p of local levels{

	replace rel_popfinal = `p'2030 if population_type == "`p'" & inlist(target_type, "l", "g", "special")
	replace rel_popfinal = `p'2020 if population_type == "`p'" & inlist(target_type, "2020_l", "2020_g")
	replace rel_pop2015 = `p'2015 if population_type == "`p'" 
	}
replace rel_popfinal = . if population_type == ""
replace rel_pop2015 = . if population_type == ""

 ** Poverty population is missing for countries with missing data; replace with total pop
replace rel_pop2015 = pop2015 if rel_pop2015==. & IndicatorRef=="1.1.1" 
 
 ** adjust women in government rate of progress back to linear fit
 replace rp_type = "linear_fit" if IndicatorRef == "5.5.1" 
 
save "output/BC_allsources_livesstake.dta", replace 
