clear
set more off
cd "V:\krasmussen\M\Data\SDG trajectories\data\stata\Final dofiles\For publication\"


********************************************************************************
********************************************************************************
** HOW MANY PEOPLE WILL THE WORLD LEAVE BEHIND?
** Assessing current trajectories on the Sustainable Development Goals

* Graphs and tables

 * I. Classify targets and standardize country names for tables and figures
 * II. Indicator decision tree (Appendix)
 * III. Which issues face the biggest challenges?
	* Figures 4 & 5 World performance on SDG targets by 2030 under BAU
	* Figures 6 & 7 Lives and needs at stake, 2018-2030
	* Appendix 1 & 2 Number of countries on and off track

 * IV. Which countries face the biggest challenges?
	* Figure 10 The most off-track countries' progress by 2030 under BAU
	* Figure 11 OECD countries' progress by 2030 under BAU
	* Appendix 3 Absolute targets: Avg. share of distance to the frontier 
	* Appendix 4 Absolute targets: SDG status by indicator
	* Appendix 5 Relative targets: SDG status by indicator
	* Figures 8 & 9 Share of lives (and needs) at stake on each indicator (top 5 countries)
	
* Homi Kharas * John W. McArthur * Krista Rasmussen
* September 12, 2018
********************************************************************************
********************************************************************************


**********************************************
** I. Classify targets and standardize country names for tables and figures
**********************************************

use "output/BC_allsources_livesstake.dta", clear

drop if IndicatorRef=="10.1.1b"

* Identify indicators by goal, target, and indicator
split IndicatorRef, p(".")
rename IndicatorRef1 goal
rename IndicatorRef2 target
rename IndicatorRef3 indicator
replace indicator = subinstr(indicator, "a", ".1", .)
replace indicator = subinstr(indicator, "b", ".2", .)
destring goal target indicator, replace

* Classify by end year
gen t_endyear = .
 label variable t_endyear "Targets categorized by end year"
 replace t_endyear = 2030 if inlist(target_type, "l", "g", "special")
  replace t_endyear = 2030 if IndicatorRef == "2.3"
 replace t_endyear = 2020 if target_type == "2020_l"

* Classify absolute vs. relative
gen t_categories = ""
 label variable t_categories "Indicators with absolute end target or relative"
 replace t_categories = "relative" if inlist(IndicatorRef, "1.2.1", "2.3", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.3.5", "3.4.1", "3.4.2")
 replace t_categories = "relative" if inlist(IndicatorRef, "3.6.1", "10.1.1a", "11.6.2", "16.1.1")
 replace t_categories = "absolute" if t_categories==""

* Standardize CountryName spelling across sources
replace CountryName ="Cote d'Ivoire" if CountryCode=="CIV"
replace CountryName ="Korea, Dem. People's Rep." if CountryCode=="PRK"
replace CountryName ="Sao Tome and Principe" if CountryCode=="STP"
replace CountryName ="Egypt, Arab Rep." if CountryCode=="EGY"
replace CountryName = "Congo, Rep." if CountryCode=="COG"
replace CountryName = "Venezuela, RB" if CountryCode=="VEN"
replace CountryName = "Micronesia, Fed. Sts." if CountryCode=="FSM"
replace CountryName = "Iran, Islamic Rep." if CountryCode=="IRN"
replace CountryName = "Brunei Darussalam" if CountryCode=="BRN"
replace CountryName = "Yemen, Rep." if CountryCode=="YEM"
replace CountryName = "Macedonia, FYR" if CountryCode=="MKD"
replace CountryName = "Cabo Verde" if CountryCode=="CPV"
replace CountryName = "Russian Federation" if CountryCode=="RUS"
replace CountryName = "Korea, Rep." if CountryCode=="KOR"

* Count countries with trajectory data
gen count = 1
bysort IndicatorRef: egen traj_data = sum(count) if ex_rp2030 != .
sort IndicatorRef traj_data
by IndicatorRef: carryforward traj_data, replace
 label variable traj_data "Number of countries with trajectory data"

save "output/BC_allsources_livesstakeCAT.dta", replace


**********************************************
** II. Indicator decision tree (Appendix)
**********************************************
use "output/BC_allsources_livesstakeCAT.dta", clear


* Identify share of relevant population covered by data
 * Calculate total relevant population by indicator
sort IndicatorRef
by IndicatorRef: egen finalpop_den = sum(rel_popfinal) if population_type != "none"
 label variable finalpop_den "Population in final year in 193 countries"
 
 * Calculate total relevant population in countries with trend data by indicator
by IndicatorRef: egen trendpop_sum = sum(rel_popfinal) if ex_rp2030 != . & population_type != "none"
 label variable trendpop_sum "Population in final year in countries with trend data"
gsort IndicatorRef -trendpop_sum
 by IndicatorRef: carryforward trendpop_sum, replace

 * Share of relevant population with trend data
gen sh_pop_trend = trendpop_sum / finalpop_den

 * Convert to millions for table
replace trendpop_sum = trendpop_sum / 1000000
replace finalpop_den = finalpop_den / 1000000

keep CountryCode goal target indicator sh_pop_trend traj_data trendpop_sum finalpop_den IndicatorRef indicatorname source t_* rp_type SDG_end SDG_onoff ex_rp2025 target_type lives_calc population_type den_type final final_value count2012_2017 initial initial_value goal target indicator

* Find unweighted average final value, max, and min
bysort IndicatorRef: egen avg_final = mean(final_value)
bysort IndicatorRef: egen max_final = max(final_value)
bysort IndicatorRef: egen min_final = min(final_value)

* Find unweighted average initial value, max, and min
bysort IndicatorRef: egen avg_initial = mean(initial_value)
bysort IndicatorRef: egen max_initial = max(initial_value)
bysort IndicatorRef: egen min_initial = min(initial_value)

* Find unweighted average final year, max, and min
bysort IndicatorRef: egen avg_finalyr = mean(final)
bysort IndicatorRef: egen max_finalyr = max(final)
bysort IndicatorRef: egen min_finalyr = min(final)

* Find unweighted average initial year, max, and min
bysort IndicatorRef: egen avg_initialyr = mean(initial)
bysort IndicatorRef: egen max_initialyr = max(initial)
bysort IndicatorRef: egen min_initialyr = min(initial)

* Find total with any data
bysort IndicatorRef: egen total_data = sum(count2012_2017)

gsort IndicatorRef -indicatorname
by IndicatorRef: carryforward indicatorname, replace

sort goal target indicator
keep IndicatorRef indicatorname source sh_pop_trend trendpop_sum finalpop_den traj_data rp_type SDG_end target_type lives_calc population_type den_type  t_* total_data avg_final max_final min_final avg_finalyr max_finalyr min_finalyr avg_initialyr max_initialyr min_initialyr avg_initial max_initial min_initial goal target indicator

duplicates drop IndicatorRef indicatorname sh_pop_trend trendpop_sum finalpop_den source rp_type target_type lives_calc population_type den_type  total_data avg_initialyr max_initialyr min_initialyr avg_initial max_initial min_initial, force
save "output/indicator_table.dta", replace

replace SDG_end = -99999 if t_categories=="relative"

order IndicatorRef indicatorname source rp_type SDG_end target_type population_type den_type lives_calc t_* total_data traj_data trendpop_sum finalpop_den sh_pop_trend avg_final max_final min_final avg_initial max_initial min_initial avg_finalyr max_finalyr min_finalyr avg_initialyr max_initialyr min_initialyr
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("ApdxIndicators_table") 



**********************************************************************
**********************************************************************
** III. Which issues face the biggest challenges?
	* Figures 4 & 5 World performance on SDG targets by 2030 under BAU
	* Figures 6 & 7 Lives and needs at stake, 2018-2030
	* Appendix figures 1 & 2 Number of countries on and off track
**********************************************************************
**********************************************************************
 
**********************************************
*** * Figures 4 & 5 World performance on SDG targets by 2030 under BAU
	* Figures 6 & 7 Lives and needs at stake, 2018-2030
**********************************************
use "output/BC_allsources_livesstakeCAT.dta", clear

 ** Drop indicators with <100 countries with trajectory data
drop if traj_data < 100 | traj_data==.

 ** For extreme income poverty, include all countries (even if they had <3% as of 2015)
replace SDG_onoff = "A" if IndicatorRef == "1.1.1" & SDG_onoff == "pA"



* 1. Indicators requiring cumulative lives calculations
****************************************************
  order SDGyes_lives* SDG2015yes_lives* const_lives* lives*, sequential
  
 ** A. Cumulative trajectories starting from 2018

  ** 1. Lives under SDG achievment trajectory (cumulative 2018 - 2030)  
  egen SDG_lives_1830 = rowtotal(SDGyes_lives2018 - SDGyes_lives2030) if SDG_onoff != "pA" & inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1"), missing 
   * Traffic deaths 2018 - 2020
  egen SDG_lives_1820 = rowtotal(SDGyes_lives2018 - SDGyes_lives2020) if SDG_onoff != "pA" & IndicatorRef == "3.6.1", missing 
 
  ** 2. Lives under BAU trajectory (cumulative 2018 - 2030)
  egen lives_1830 = rowtotal(lives2018 - lives2030) if SDG_onoff != "pA" & inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1"), missing
   * Traffic Deaths 2018 - 2020
  egen lives_1820 = rowtotal(lives2018 - lives2020) if SDG_onoff != "pA" & IndicatorRef == "3.6.1", missing
 
 
 ** B. Cumulative trajectories starting from 2016
 
  ** 1. Lives under SDG achievement trajectory (Cumulative 2016 - 2030)
  egen SDG2015_lives_1630 = rowtotal(SDG2015yes_lives2016 - SDG2015yes_lives2030) if SDG_onoff != "pA" & inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1"), missing 
   * Traffic deaths 2016 - 2020  
  egen SDG2015_lives_1620 = rowtotal(SDG2015yes_lives2016 - SDG2015yes_lives2020) if SDG_onoff != "pA" & IndicatorRef == "3.6.1", missing 
  
  ** 2. Lives under constant 2015 trajectory (cumulative 2016 - 2030)
  egen const_lives_1630 = rowtotal(const_lives2016 - const_lives2030) if SDG_onoff != "pA" & inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1"), missing
   * Traffic deaths 2016 - 2020
  egen const_lives_1620 = rowtotal(const_lives2016 - const_lives2020) if SDG_onoff != "pA" & IndicatorRef == "3.6.1", missing
  
  ** 3. Lives under BAU trajectory (cumulative 2016 - 2030)
  egen lives_1630 = rowtotal(lives2016 - lives2030) if SDG_onoff != "pA" & inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1"), missing
   * Traffic deaths 2016 - 2020
  egen lives_1620 = rowtotal(lives2016 - lives2020) if SDG_onoff != "pA" & IndicatorRef == "3.6.1", missing 
  


* 2. Combine cumulative indicators with those examining lives only in 2030
**************************************************** 

 ** A. Lives under SDG achievement trajectory
 
  ** 1. Trajectories starting from 2018
  gen SDG_lives = SDGyes_lives2030 if t_endyear == 2030 & SDG_onoff != "pA" 
   * Add cumulative lives for relevant indicators
  replace SDG_lives = SDG_lives_1830 if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1")
  replace SDG_lives = SDG_lives_1820 if IndicatorRef == "3.6.1"
  label variable SDG_lives "Lives affected if SDG met (trajectory from 2018)"  

  ** 2. Trajectories starting from 2015
  gen SDG2015_lives = SDGyes_lives2030 if t_endyear == 2030 & SDG_onoff != "pA" 
   * Add cumulative lives for relevant indicators
  replace SDG2015_lives = SDG2015_lives_1630 if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1")
  replace SDG2015_lives = SDG2015_lives_1620 if IndicatorRef == "3.6.1"
  label variable SDG2015_lives "Lives affected if SDG met (trajectory from 2015)"  

  
 ** B. Lives under BAU trajectory
 
  ** 1. Trajectories starting from 2018
  gen BAU_lives = lives2030 if SDG_onoff != "pA"
   * Add cumulative lives for relevant indicators
  replace BAU_lives = lives_1830 if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1")
  replace BAU_lives = lives_1820 if IndicatorRef == "3.6.1"
  label variable BAU_lives "Lives affected on BAU trajectory"

  ** 2. Trajectories starting from 2015
  gen BAU2015_lives = lives2030 if SDG_onoff != "pA"
   * Add cumulative lives for relevant indicators
  replace BAU2015_lives = lives_1630 if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1")
  replace BAU2015_lives = lives_1620 if IndicatorRef == "3.6.1" 
  label variable BAU_lives "Lives affected on BAU trajectory" 

  
 ** C. Lives under constant trajectory
 
  ** 1. Trajectories starting from 2015
  gen const2015_lives = const_lives2030 if SDG_onoff != "pA"
   * Add cumulative lives for relevant indicators
  replace const2015_lives = const_lives_1630 if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.3.1", "3.3.2", "3.3.3", "3.3.4", "3.4.1", "3.4.2", "16.1.1")
  replace const2015_lives = const_lives_1620 if IndicatorRef == "3.6.1"
  label variable const2015_lives "Lives affected if 2015 value remained constant"

  
  
* 3. Calculate lives at stake and share of people on track to achieve target
**************************************************** 

 ** A. Lives at stake (difference between BAU and SDG achievement trajectory)
	* For cumulative indicators, uses 2018 - 2030
 gen dif_BAUSDG_lives = BAU_lives - SDG_lives
 * due to rounding in stata, calculation produces some values with slighly more or less than zero lives (i.e. -.00005 lives)
 replace dif_BAUSDG_lives = 0 if SDG_onoff == "A" & IndicatorRef != "1.1.1" & dif_BAUSDG_lives != .
 label variable dif_BAUSDG_lives "Lives at stake: Dif. between BAU and SDG trajectories"

 
 ** B. Distance traveled in lives (difference between constant and BAU trajectory)
	* For cumulative indicators, uses 2015 - 2030
 gen dif_constBAU2015_lives = const2015_lives - BAU2015_lives
 label variable dif_constBAU2015_lives "Dif. between constant 2015 value and BAU in end year lives (using 2015 trajectory)"

 
 ** C. SDG promise: distance should have traveled to meet SDG (difference between constant and SDG achievement trajectory)
	* For cumulative indicators, uses 2015 - 2030
 gen dif_constSDG2015_lives = const2015_lives - SDG2015_lives if dif_constBAU2015_lives != .
 label variable dif_constSDG2015_lives "Dif. between constant 2015 value and SDG yes in end year lives (using 2015 trajectory)"

 
 ** D. Sum categories across all countries 
 bysort IndicatorRef: egen sum_BAUSDG_lives = sum(dif_BAUSDG_lives)
 label variable sum_BAUSDG_lives "Sum of lives at stake"
 
 bysort IndicatorRef: egen sum_constBAU2015_lives = sum(dif_constBAU2015_lives)
 label variable sum_constBAU2015_lives "Sum of distance traveled in lives (using 2015 trajectory)"
 
 bysort IndicatorRef: egen sum_constSDG2015_lives = sum(dif_constSDG2015_lives)
 label variable sum_constSDG2015_lives "Sum of SDG promise in lives (using 2015 trajectory)" 
 

 ** E. Calculate share of promise on track to be fulfilled by 2030
gen distance_traveled = (sum_constBAU2015_lives / sum_constSDG2015_lives)
 label variable distance_traveled "Share of promise on track to be fullfilled by 2030"
gen sh_rem_prob = 1 - (sum_constBAU2015_lives / sum_constSDG2015_lives)
 label variable sh_rem_prob "Share of remaining problem"

save "output/which_issues_precollapse_all.dta", replace
 

 ** F. Clean for graph 
 * Keep only countries with trajectory data that hadn't met SDG as of 2015
drop if dif_constBAU2015_lives==.

 * Remove duplicates
collapse (first) goal target indicator den_type SDG_end target_type distance_traveled sh_rem_prob sum_BAUSDG_lives sum_constBAU2015_lives sum_constSDG2015_lives, by(IndicatorRef)
format sum* %11.0f
save "output/which_issues.dta", replace

 * Add indicator categories
use "output/which_issues.dta", clear
merge 1:1 IndicatorRef using  "output\indicator_table.dta", keepusing(t_categories)
keep if _merge==3
drop _merge

 * Add indicator short names
merge 1:1 IndicatorRef using "input\indicator_shortnames.dta", keepusing(numindicator_short indicator_shortnum)
drop if _merge == 2
drop _merge

gen LD = "1life death" if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.4.1", "3.4.2", "3.6.1", "16.1.1")
replace LD = "2quality" if LD=="" & t_categories!="other"
gsort t_categories -goal -target -indicator

drop IndicatorRef

save "output/which_issues_indicatorlvl.dta", replace

** For Figures 4 & 5 World performance on SDG targets by 2030 under BAU: use "distance_traveled" and "sh_rem_prob"
use "output/which_issues_indicatorlvl.dta", clear
keep numindicator_short distance_traveled sh_rem_prob LD sum_constSDG2015_lives 
order numindicator_short distance_traveled sh_rem_prob LD sum_constSDG2015_lives 
gsort -LD -distance_traveled
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F4_5_disttraveled_pct") 

** For Figure 6 & 7 Lives and needs at stake, 2018-2030: use "sum_BAUSDG_lives"
use "output/which_issues_indicatorlvl.dta", clear
keep indicator_shortnum sum_BAUSDG_lives LD
gsort -LD -sum_BAUSDG_lives
order indicator_shortnum
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F6_7_livesstake") 



**********************************************
** Appendix 1 & 2: Number of countries on and off track
**********************************************
use "output/BC_allsources_livesstakeCAT.dta", clear

 ** Drop indicators with <100 countries with trajectory data
drop if traj_data <100 | traj_data==.

keep CountryName IndicatorRef target_type t_categories SDG_onoff SDG_end final2015 rp traj_data

 ** Classify indicators by life and death and basic needs
gen LD = "1life death" if inlist(IndicatorRef, "3.1.1", "3.2.1", "3.4.1", "3.4.2", "3.6.1", "16.1.1")
replace LD = "2BD" if LD==""

 ** Count countries by indicator and SDG achievement category, 
gen count = 1
sort IndicatorRef SDG_onoff
collapse (sum) count (first) LD, by(IndicatorRef SDG_onoff)

reshape wide count, i(IndicatorRef) j(SDG_onoff, string)

order IndicatorRef countpA
foreach var of varlist count*{
replace `var' = 0 if `var'==.
}

 **Add indicator short names
merge 1:1 IndicatorRef using "input\indicator_shortnames.dta", keepusing(indicator_shortnum)
drop if _merge == 2
drop _merge IndicatorRef
order indicator_shortnum 

 ** Create variables to allow for sorting by most number of achieved/on track
gen pa_A = countpA + countA
gen traj_yes = 0 if countpA + countA + countB + countC + countD == 0
replace traj_yes = 1 if countpA + countA + countB + countC + countD > 1 
gsort LD -traj_yes -pa_A -countpA -countA -countB -countC -countD -countE

export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("Apdx1_2_SDGonoff") 



**********************************************************************
**********************************************************************
 * IV. Which countries face the biggest challenges?
	* Figure 10 The most off-track countries' progress by 2030 under BAU
	* Figure 11 OECD countries' progress by 2030 under BAU
	* Appendix 3 Absolute targets: Avg. share of distance to the frontier 
	* Appendix 4 Absolute targets: SDG status by indicator
	* Appendix 5 Relative targets: SDG status by indicator
	* Figures 8 & 9 Share of lives (and needs) at stake on each indicator (top 5 countries)
**********************************************************************
**********************************************************************


**********************************************
** Figures 10 & 11 Average distance traversed towards SDG in 2015 and 2030
**********************************************

**********************************************
 * Distance to the frontier (for absolute targets)
 ** Use absolute distance to the target in 2030 to calculate
 ** 0 = frontier
 ** worst is 5th worst percentile in 2015
 
 * Share of distance remaining (for relative targets)
**********************************************
use "output/BC_allsources_livesstakeCAT.dta", clear

drop if traj_data <100 | traj_data==.

 * 1. By indicator, identify 5th worst percentile in 2015
rename endGap end2030Gap
rename finalGap final2015Gap
 
 * Calculate 5th worst percentile
egen pct5_finalGap = pctile(final2015Gap), p(95) by(IndicatorRef) 
 label variable pct5_finalGap "5th worst percentile of absolute distance to target in 2015"

 
 * 2. Calculate distance to the frontier and share of distance 
 ** 2030 distance taversed to the frontier for absolute targets
gen dist2front = (pct5_finalGap - end2030Gap) / (pct5_finalGap - 0) if t_categories == "absolute" & pct5_finalGap != . & end2030Gap != .
 label variable dist2front "Distance to the frontier in 2030"

 ** 2015 distance traversed to the frontier for absolute targets
gen dist2front2015 = (pct5_finalGap - final2015Gap) / (pct5_finalGap - 0) if t_categories == "absolute" & pct5_finalGap != . & final2015Gap != .
 label variable dist2front2015 "Distance to the frontier in 2015" 
 
 * For absolute targets, replace distance with 0 if beyond 5th worst percentile
 replace dist2front = 0 if dist2front < 0 & dist2front != . & t_categories == "absolute"
 replace dist2front2015 = 0 if dist2front2015 < 0 & dist2front2015 != . & t_categories == "absolute"

  ** 2030 share of distance for relative targets
gen sh_distrem = (final2015 - ex_rp2030) / (final2015 - SDG_end) if t_categories == "relative" & !inlist(SDG_onoff, "A", "pA") & target_type != "2020_l" & final2015 != . & final2015Gap != . & ex_rp2030 != .
 replace sh_distrem = (final2015 - ex_rp2020) / (final2015 - SDG_end) if t_categories == "relative"  & target_type == "2020_l" & final2015 != . & final2015Gap != . & ex_rp2030 != .
 replace sh_distrem = 1 if inlist(SDG_onoff, "A", "pA") & t_categories == "relative"
 replace sh_distrem = 0 if sh_distrem < 0 & sh_distrem != . & t_categories == "relative"
 label variable sh_distrem "Share of distance remaining in 2030"
 
  
 * 3. Calculate country's avg. distance to the frontier value by target type
egen tmean_dist2front = mean(dist2front), by(CountryName)
 label variable tmean_dist2front "Avg. distance in 2030 to frontier by indicator type"
egen tmean_dist2front2015 = mean(dist2front2015), by(CountryName)
 label variable tmean_dist2front2015 "Avg. distance in 2015 to frontier by indicator type"
 
 egen tmean_sh_distrem = mean(sh_distrem), by(CountryName)
 label variable tmean_sh_distrem "Avg. share of distance remaining in 2030 by indicator type"

* Add indicator short names
merge m:1 IndicatorRef using "input\indicator_shortnames.dta", keepusing(numindicator_short)
drop if _merge == 2
drop _merge 
 

 * 4. Adjust for graphs
keep CountryName numindicator_short end2030Gap final2015Gap dist2front dist2front2015 sh_distrem pct5_finalGap tmean_* t_* SDG_onoff CountryCode traj_data

 * By country and type of indicator, count number of indicators with trend data
 gen count = 1
 bysort CountryName: egen cnt_ab = sum(count) if t_categories=="absolute" & dist2front != .
  gsort CountryName -cnt_ab
  by CountryName: carryforward cnt_ab, replace
 bysort CountryName: egen cnt_rel = sum(count) if t_categories=="relative" & sh_distrem != .
  gsort CountryName -cnt_rel
  by CountryName: carryforward cnt_rel, replace
  
  * Drop country from ranking tables if it has data for <4 relative indicators or <9 absolute
drop if t_categories == "relative" & cnt_rel < 5
drop if t_categories == "absolute" & cnt_ab < 9

save "output/avg_dist2front.dta", replace

drop SDG_onoff CountryCode
reshape wide tmean_dist2front tmean_sh_distrem tmean_dist2front2015, i(CountryName numindicator_short) j(t_categories, string)

 * By Country, carrforward average across all indicators
foreach var of varlist tmean*{
gsort CountryName -`var'
by CountryName: carryforward `var', replace
}

keep CountryName tmean* 
duplicates drop
save "output/avg_dist2front_output.dta", replace


**** All countries
** Figure 10a Average distance traversed towards SDG in 2015 and 2030 - absolute targets
use "output/avg_dist2front_output.dta", clear
keep CountryName *absolute 
drop tmean_sh_distremabsolute
gsort tmean_dist2frontabsolute
gen graph_value = 1
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F10a_Countries_avgdist_ab")

** Figure 10b Average distance traversed towards SDG in 2015 and 2030 - relative targets
use "output/avg_dist2front_output.dta", clear
keep CountryName tmean_sh_distremrelative tmean_dist2frontabsolute
gsort tmean_dist2frontabsolute
drop tmean_dist2frontabsolute
gen dist_2015 = 0
gen graph_value = 1
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F10b_Countries_avgdist_rel")


**** OECD countries
** Figure 11a Average distance traversed towards SDG in 2015 and 2030 - absolute targets 
use "output/avg_dist2front_output.dta", clear
merge 1:1 CountryName using "input\OECD_countries.dta", keepusing(dac oecd)
drop _merge
keep if oecd == 1
keep CountryName *absolute dac oecd
drop tmean_sh_distremabsolute
gsort tmean_dist2frontabsolute
gen graph_value = 1
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F11a_Countries_avgOECD_abs")

** Figure 11b Average distance traversed towards SDG in 2015 and 2030 - relative targets 
use "output/avg_dist2front_output.dta", clear
merge 1:1 CountryName using "input\OECD_countries.dta", keepusing(dac oecd)
drop _merge
keep if oecd == 1
keep CountryName tmean_sh_distremrelative tmean_dist2frontabsolute dac oecd
gsort tmean_dist2frontabsolute
drop tmean_dist2frontabsolute
gen dist_2015 = 0
gen graph_value = 1
order CountryName tmean* dist_2015
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F11b_Countries_avgOECD_rel")



**********************************************
** Appendix 3 Absolute targets: Avg. share of distance to the frontier 
** Appendix 4 Absolute targets: SDG status by indicator
** Appendix 5 Relative targets: SDG status by indicator
**********************************************
** Appendix 3 Absolute targets: Avg. share of distance to the frontier 
use "output/avg_dist2front.dta", clear
keep if t_categories=="absolute"
keep CountryName numindicator_short dist2front tmean_dist2front tmean_dist2front2015
replace numindicator_short = subinstr(numindicator_short, ".", "__", .)
rename dist2front df
reshape wide df, i(CountryName) j(numindicator_short, string)
gsort tmean_dist2front
order CountryName tmean_dist2front2015* tmean* df*

foreach var of varlist tmean_dist2front2015 tmean_dist2front df*{
	replace `var' = `var' * 100
	}

save "output/avg_dist2front_wide.dta", replace
gsort tmean_dist2front
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("Apdx3_Countries_distind_ab")


** Appendix 4 Absolute targets: SDG status by indicator
use "output/avg_dist2front.dta", clear
keep if t_categories=="absolute"
keep CountryName numindicator_short tmean_dist2front SDG_onoff 
replace numindicator_short = subinstr(numindicator_short, ".", "__", .)
rename SDG_onoff SDG
reshape wide SDG, i(CountryName) j(numindicator_short, string)
gsort tmean_dist2front
order SDG*, sequential
order  CountryName tmean_dist2front SDG*
replace tmean_dist2front = tmean_dist2front * 100
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("Apdx4_Countries_SDG_abs")


** Appendix 5 Relative targets: SDG status by indicator
use "output/avg_dist2front.dta", clear
keep if t_categories=="relative"
keep CountryName numindicator_short tmean_sh_distrem SDG_onoff 
replace numindicator_short = subinstr(numindicator_short, ".", "__", .)
rename SDG_onoff SDG
reshape wide SDG, i(CountryName) j(numindicator_short, string)
gsort tmean_sh_distrem
order SDG*, sequential
order  CountryName tmean_sh_distrem SDG*
replace tmean_sh_distrem = tmean_sh_distrem * 100
export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("Apdx5_Countries_distind_rel")




**********************************************
* Figures 8 & 9 Share of lives (and needs) at stake on each indicator (top 5 countries)
**********************************************
use "output/which_issues_precollapse_all.dta", clear

drop if traj_data < 100 | traj_data == .

 * 1. Share of world's people being left behind on BAU in each country (Country's dif. between BAU and SDG yes trajectory as a share of total dif.)
gen pct_LB = dif_BAUSDG_lives / sum_BAUSDG_lives
 label variable pct_LB "Share of people left behind on BAU in each country"
 
 * 2. Rank countries with largest share by indicator
gsort IndicatorRef -pct_LB
by IndicatorRef: gen rank_pct_LB = _n if pct_LB != .
 replace rank_pct_LB = 193 if pct_LB == 0 
 
 * 3. Cumulative share by Indicator
by IndicatorRef: gen cum_pct_LB = sum(pct_LB)

 * 4. Identify cumulative share accounted for by countries with top 5 most lives at stake
gen top5_cumpct = cum_pct_LB if rank_pct_LB <= 5
sort IndicatorRef top5_cumpct
by IndicatorRef: carryforward top5_cumpct, replace
 label variable top5_cumpct "Cumulative share of lives at stake accounted for by top 5 countries with most at stake"

save "output/wherepeople_LB.dta", replace


** Figure 8 Share of lives (and needs) at stake on each absolute indicator, by country
use "output/wherepeople_LB.dta", clear
drop if lives_calc=="no"
keep if t_categories =="absolute"

 * Mark which countries have data for which indicators
	* -9999 indicates country is on track for that indicator
	* negative value indicates country is not in top 5 but has lives/needs at stake
	* blank indicates no data
replace pct_LB = pct_LB * -1 if rank_pct_LB > 5 & rank_pct_LB != . & !inlist(SDG_onoff, "pA", "A")
replace pct_LB = -9999 if inlist(SDG_onoff, "pA", "A")
keep CountryName IndicatorRef pct_LB

**Add indicator short names
merge m:1 IndicatorRef using "input\indicator_shortnames.dta", keepusing(numindicator_short)
drop if _merge== 2
drop _merge IndicatorRef
replace numindicator_short = subinstr(numindicator_short, ".", "__", .)
reshape wide pct_LB, i(CountryName) j(numindicator_short, string)
order CountryName pct_LB*, sequential

* Keep only countries that account for 5 largest shares of people left behind for at least one indicator (if max < 0 then drop)
egen keep = rowmax(pct_LB*)
keep if keep >= 0 & keep != .
drop keep

* Count times in top 5
foreach var of varlist pct_LB*{
gen t`var' = 1 if `var' > 0 & `var' != .
}
egen top5_count = rowtotal(tpct_LB1__1_Extreme_poverty-tpct_LB16__9_Birth_registration)
drop tpct*
gsort -top5

export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F8_top5share_abs")



** Figure 9 Share of lives (and needs) at stake on each relative indicator, by country
use "output/wherepeople_LB.dta", clear
drop if lives_calc=="no"
keep if t_categories =="relative"

 * Mark which countries have data for which indicators
	* -9999 indicates country is on track for that indicator
	* negative value indicates country is not in top 5 but has lives/needs at stake
	* blank indicates no data
replace pct_LB = pct_LB * -1 if rank_pct_LB > 5 & rank_pct_LB != . & !inlist(SDG_onoff, "pA", "A")
replace pct_LB = -9999 if inlist(SDG_onoff, "pA", "A")
keep CountryName IndicatorRef pct_LB 

**Add indicator short names
merge m:1 IndicatorRef using "input\indicator_shortnames.dta", keepusing(numindicator_short)
drop if _merge == 2
drop _merge IndicatorRef
replace numindicator_short = subinstr(numindicator_short, ".", "__", .)
reshape wide pct_LB, i(CountryName) j(numindicator_short, string)
order CountryName pct_LB*, sequential

* Keep only countries that account for 5 largest shares of people left behind for at least one indicator (if max < 0 then drop)
egen keep = rowmax(pct_LB*)
keep if keep >= 0 & keep != .
drop keep

* Count times in top 5
foreach var of varlist pct_LB*{
gen t`var' = 1 if `var' > 0 & `var' != .
}
egen top5_count = rowtotal(tpct_LB3__3_HIV-tpct_LB16__1_Homicide)
drop tpct*
gsort -top5

export excel using "output\figures\figures.xlsm", sheetreplace firstrow(variables) sheet("F9_top5share_rel")

