clear
set more off

****************************************************************************
****************************************************************************
**    100% INDICATORS
** primary completion rate * primary completion rate aggregate * gender parity (primary, secondary, teritiary)

** John W. McArthur & Krista Rasmussen
** January 2017
****************************************************************************
****************************************************************************

****************************************************************************
***PRIMARY SCHOOL COMPLETION RATE
****************************************************************************
*I. Rate of progress and counterfactual calculations
	*A. Pre-MDG average annual percentage point rate of progress 
	*B. Post-MDG average annual percentage point rate of progress
	*C. Difference in rate of progress
	*D. Extrapolate pre-MDG rate of progress to find counterfactual
*II. Country count acceleration test
	*A. Acceleration test - country count
****************************************************************************

***************************************************
**I. RATE OF PROGRESS AND COUNTERFACTUAL CALCULATIONS
***************************************************

local lista "primary_completion_85" 

foreach p of local lista{
use "`p'_clean", clear

 * Primary completion rate can have a value greater than 100%, convert all values >1 into =1
foreach var of varlist n1990-n2015{
replace `var' = 1 if `var' > 1 & `var'!=. 
}
**************************************
 * A * Pre-MDG Average Annual Percentage Point Change
**************************************
** due to variation in initial and final year of data for each country, no standard initial or final year can be used for all countries
 * instead, use first available data point up to 1995 for initial and 2000, 2001, or 1999 as endpoint
gen rppre = (n2000-n1990)/10 
	**2000
forvalues num=9(-1)5{
local year = 2000 - `num'
 replace rppre = (n2000 - n`year')/`num' if rppre==.
   }
	**2001 (if 2000 is missing)
forvalues num=11(-1)6{
local year = 2001 - `num'
 replace rppre = (n2001 - n`year')/`num' if rppre==.
   }
	**1999 (if 2000 and 2001 are missing)   
forvalues num=9(-1)4{
local year = 1999 - `num'
 replace rppre = (n1999 - n`year')/`num' if rppre==.
   }

**Exclude countries from rate of progress calculations if they have a value in 2000 (or 2001 or 1999 if no data in 2000) of >=99%
**Countries >=99% were neither the target of this MDG nor have the practical ability to "accelerate" their rate of progress
gen all_rppre = rppre
replace rppre = . if n2000>=.99 & n2000!=. 
replace rppre = . if n2000==. & n2001>=.99 & n2001!=.
replace rppre = . if n2000==. & n2001==. & n1999>=.99 & n1999!=.


**************************************
 * B * Post-MDG Average Annual Percentage Point Change
**************************************
*if data is missing for 2015, use most recent observation 2010 onward. If 2000 missing, try 2001 then 1999.
gen rppost = (n2015-n2000)/15
	**2000
forvalues num=14(-1)10{
local year = 2000 + `num'
 replace rppost = (n`year' - n2000)/`num' if rppost==.
   }
	**2001 (if 2000 is missing)
forvalues num=14(-1)9{
local year = 2001 + `num'
 replace rppost = (n`year' - n2001)/`num' if rppost==.
   }
	**1999 (if 2000 and 2001 are missing)   
forvalues num=16(-1)11{
local year = 1999 + `num'
 replace rppost = (n`year' - n1999)/`num' if rppost==.
   }
**For countries that reach 100% before 2015, need to calculate rate of progress up to that year instead of 2015
**e.g. if 2007 =100% & last year with data =100%, use 2007 to calculate rate of progress

*1 create a full 2015 value that finds the last year of data down to 2010
gen fulln2015 = n2015
 replace fulln2015 = n2014 if n2015==.
 replace fulln2015 = n2013 if n2015==. & n2014==.
 replace fulln2015 = n2012 if n2015==. & n2014==. & n2013==.
 replace fulln2015 = n2011 if n2015==. & n2014==. & n2013==. & n2012==.
 replace fulln2015 = n2010 if n2015==. & n2014==. & n2013==. & n2012==. & n2011==.
 
*2a Find first year 2002 onward with a value =1 and replace rate of progress calculation using that year
forvalues num=14(-1)2{
local year = 2000 + `num'
 replace rppost = (n`year' - n2000)/`num' if fulln2015==1 & fulln2015!=. & n`year'==1 & n`year'!=.
   }
   
*2b use 2001 if 2000 is missing   
forvalues num=13(-1)1{
local year = 2001 + `num'
 replace rppost = (n`year' - n2001)/`num' if n2000==. & fulln2015==1 & fulln2015!=. & n`year'==1 & n`year'!=.
   }   
   
*2c use 1999 if 2000 & 2001 are missing
forvalues num=15(-1)3{
local year = 1999 + `num'
 replace rppost = (n`year' - n1999)/`num' if n2000==. & n2001==. & fulln2015==1 & fulln2015!=. & n`year'==1 & n`year'!=.
   }   
 
 **Exclude countries from rate of progress calculations if they have a value in 2000 (or 2001 or 1999 if no data in 2000) of >=99%
**Countries >=99% were neither the target of this MDG nor have the ability to "accelerate" their rate of progress
gen all_rppost = rppost // all countries (even above cut-off)
replace rppost = . if n2000>=.99 & n2000!=. 
replace rppost = . if n2000==. & n2001>=.99 & n2001!=.
replace rppost = . if n2000==. & n2001==. & n1999>=.99 & n1999!=.


**************************************
 * C * Find difference in rate of progress before/after MDGs
**************************************
*1990-2000 v. 2000-2015 
local listb "pre" //years 1990-2000
local listc "post" //years 2000-2015

foreach pre of local listb{
foreach pos of local listc{

gen difrp`pre'_`pos' = rp`pos'-rp`pre' 
label var difrp`pre'_`pos' "dif in rate of progress, (`pos')-(`pre')"

gen fulldata`pre'_`pos'=0
replace fulldata`pre'_`pos' = 1 if rp`pre'!=. & rp`pos'!=.
label var fulldata`pre'_`pos' "# countries with data for all years `pre' `pos'"
}
}
**************************************
 * D * Use rate of progress to generate counterfactual for 2001-2015
**************************************
 ** at pre MDG rates  
local listb "pre"

foreach pre of local listb{
for num 2001/2015: gen cf`pre'_X = n2000+(rp`pre'*(X - 2000)) if rp`pre'!=.
for num 2001/2015: replace cf`pre'_X = n2001+(rp`pre'*(X - 2001)) if rp`pre'!=. & n2000==. //to capture countries that don't have 2000 data
for num 2001/2015: replace cf`pre'_X = n1999+(rp`pre'*(X - 1999)) if rp`pre'!=. & n2000==. & n2001==. //to capture countries that don't have 2000 or 2001 data
for num 2001/2015: replace cf`pre'_X = 1 if cf`pre'_X > 1 & cf`pre'_X!=. //can't have more than 100%
for num 2001/2015: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}

save "`p'_rpfull", replace
}

***************************************************
**II. COUNTRY COUNT ACCELERATION TEST  
***************************************************

local lista "primary_completion_85"

foreach p of local lista{
use "`p'_rpfull", clear

**create full country version of 1990 (if missing go up to 1995) and 2000 (if missing, check 2001 then 1999)
gen fulln1990 = n1990
 replace fulln1990 = n1991 if n1990==.
 replace fulln1990 = n1992 if n1990==. & n1991==.
 replace fulln1990 = n1993 if n1990==. & n1991==. & n1992==.
 replace fulln1990 = n1994 if n1990==. & n1991==. & n1992==. & n1993==.
 replace fulln1990 = n1995 if n1990==. & n1991==. & n1992==. & n1993==. & n1994==.

gen fulln2000 = n2000
 replace fulln2000= n2001 if n2000==.
 replace fulln2000 = n1999 if n2000==. & n2001==.

**************************************************************************************
 * A * Acceleration test - simple country count
**************************************************************************************
* (i) Create dummy variable for countries that had any acceleration between pre and post rates of progress
gen difrppre_post_any = 0
 replace difrppre_post_any = 1 if difrppre_post>0
 replace difrppre_post_any = . if difrppre_post==.
 replace difrppre_post_any = 0 if rppost<=0 & difrppre_post!=. //exclude countries from acceleration count that are still decreasing access (even if at a slower rate)

 * (ii) Difference in speed > .33 percentage points per year
gen difrppre_post_1 = 0
 replace difrppre_post_1 = 1 if difrppre_post >=.0033 
 replace difrppre_post_1=. if difrppre_post==.
 replace difrppre_post_1 = 0 if rppost<=0 & difrppre_post!=. //exclude countries from acceleration count that are still decreasing access (even if at a slower rate)
label variable difrppre_post_1 "rate of progress in post is .33 pct point greater than pre"

**Due to missing data at the countrylevel, we only use WB aggregate to calculate lives affected
save "primary_completion_85_analysis", replace
}




****************************************************************************
***PRIMARY SCHOOL COMPLETION RATE - AGGREGATE BY REGION AND DEVELOPING COUNTRY
****************************************************************************
*I. Rate of progress and counterfactual calculations
	*A. Rate of progress, average annual percentage point Pre- and Post-MDG
	*B. Difference in rate of progress
	*C. Extrapolate pre-MDG rate of progress to find counterfactual
*II. Acceleration test and lives improved
	*A. Acceleration test 
	*B. Lives improved compared to BAU trajectory
****************************************************************************

***************************************************
**I. RATE OF PROGRESS AND COUNTERFACTUAL CALCULATIONS
***************************************************

local lista "primary_completion_ag_clean"

foreach p of local lista{
use "`p'", clear

* Primary completion rate can have a value greater than 100%, convert all >1 into =1
foreach var of varlist n1990-n2015{
replace `var' = 1 if `var' > 1 & `var'!=. 
}

**************************************
 * A * Rate of Progress calculations pre and post
**************************************
 * Pre
gen rp9000 = (n2000-n1990)/10
gen rp9601 = (n2001-n1996)/5
 * Post
gen rp0013 = (n2013-n2000)/13
 replace rp0013 = (n2012-n2000)/12 if n2013==.
 replace rp0013 = (n2011-n2000)/11 if n2013==. & n2012==.
 replace rp0013 = (n2010-n2000)/10 if n2013==. & n2012==. & n2011==.

**************************************************************************************
 * B * Find difference in rate of progress before/after MDGs
**************************************************************************************
*1990-2000 v. 2000-2013 
local listb "9000" //years 1990-2000
local listc "0013" //years 2000-2013

foreach pre of local listb{
foreach pos of local listc{

gen difrp`pre'_`pos' = rp`pos'-rp`pre' 
label var difrp`pre'_`pos' "dif in rate of progress, (`pos')-(`pre')"

gen fulldata`pre'_`pos'=0
replace fulldata`pre'_`pos' = 1 if rp`pre'!=. & rp`pos'!=.
label var fulldata`pre'_`pos' "# geographies with data for all years `pre' `pos'"
}
}
*******************
 * C * Use rate of progress to generate counterfactual for 2001-2013
******************* 
local listb "9000"
foreach pre of local listb{
for num 2001/2013: gen cf`pre'_X = n2000+(rp`pre'*(X - 2000)) if rp`pre'!=.
for num 2001/2013: replace cf`pre'_X = 1 if cf`pre'_X > 1 & cf`pre'_X!=. //can't have more than 100%
for num 2001/2013: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}

local listb "9601"
foreach pre of local listb{
for num 2002/2013: gen cf`pre'_X = n2001+(rp`pre'*(X - 2001)) if rp`pre'!=.
for num 2002/2013: replace cf`pre'_X = 1 if cf`pre'_X > 1 & cf`pre'_X!=. //can't have more than 100%
for num 2002/2013: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}
save "`p'_rpfull", replace
}

***************************************************
**II. ACCELERATION TEST AND LIVES IMPROVED 
***************************************************

use "primary_completion_ag_clean_rpfull.dta", clear

**************************************************
 * A * Acceleration Test	
**************************************************
gen difrp9000_0013_any = 0
 replace difrp9000_0013_any = 1 if difrp9000_0013>0
 replace difrp9000_0013_any = . if difrp9000_0013==.
 replace difrp9000_0013_any = 0 if rp0013<=0 & difrp9000_0013!=.
 
gen difrp9000_0013_1 = 0
 replace difrp9000_0013_1 = 1 if difrp9000_0013 >=.0033 
 replace difrp9000_0013_1=. if difrp9000_0013==.
 replace difrp9000_0013_1 = 0 if rp0013<=0 & difrp9000_0013!=.
label variable difrp9000_0013_1 "rate of progress in 0013 .33 pct point greater than pre"

**************************************************
 * B * Lives improved compared to BAU trajectory using counterfactual A and B
**************************************************
* Counterfactual A: Estimated number of children completing primary school based on 1990-2000 rates** 
for num 2001/2013: gen xprimary_cf9000_X  = cf9000_X * primaryX
for num 2001/2013: label variable xprimary_cf9000_X "Counterfactual A: estimated X children completed primary"

* Counterfactual A: Estimated lives affected based on 1990-2000 rates** 
for num 2001/2013: gen xaff_cf9000_X  =  (nX * primaryX ) - xprimary_cf9000_X
for num 2001/2013: label variable xaff_cf9000_X "Counterfactual A: estimated lives affected in X, extrapolated from pre-MDG (~1990-~2000) rates"

** Calculate cumulative value of how many kids completed primary vs. how many would have 2001-2013
egen xaff_cf9000_total = rowtotal(xaff_cf9000_2001-xaff_cf9000_2013)

* Counterfactual B: Estimated number of children completing primary school based on 1996-2001 rates** 
for num 2002/2013: gen xprimary_cf9601_X  = cf9601_X * primaryX
for num 2002/2013: label variable xprimary_cf9601_X "Counterfactual B: estimated X children completed primary"

* Counterfactual B: Estimated lives affected in final year based on 1996-2001 rates** 
for num 2002/2013: gen xaff_cf9601_X  =  (nX * primaryX ) - xprimary_cf9601_X
for num 2002/2013: label variable xaff_cf9601_X "Counterfactual B: estimated lives affected in X, extrapolated from pre-MDG (~1996-~2001) rates"

** Calculate cumulative value of how many kids completed primary vs. how many would have 2001-2013
egen xaff_cf9601_total = rowtotal(xaff_cf9601_2002-xaff_cf9601_2013)

save "primary_completion_ag_clean_analysis", replace





****************************************************************************
***GENDER PARITY RATIOS - PRIMARY, SECONDARY, TERTIARY
****************************************************************************
*I. Rate of progress and counterfactual calculations
	*A. Pre-MDG average annual rate of change
	*B. Post-MDG average annual rate of change
	*C. Difference in rate of progress
	*D. Extrapolate pre-MDG rate of progress to find counterfactual
*II. Country count acceleration test
	*A. Acceleration test 
****************************************************************************

***************************************************
**I. RATE OF PROGRESS AND COUNTERFACTUAL CALCULATIONS
***************************************************
local lista "genderratio_primary_85 genderratio_secondary_85 genderratio_tertiary_85"
foreach p of local lista {

use "`p'_clean", clear

 * Gender ratio indicators are female gross enrollment ratio over male gross enrollment ratio
 * Gender ratio < 1 shows girls at a disadventage. If original ratio is > 1 (favors girls), replace ratio >1 as = 1
foreach var of varlist n1990-n2015{
replace `var' = 1 if `var' > 1 & `var'!=. 
}

**************************************
 * A * Pre-MDG Average Annual Rate of Change - percentage point
**************************************

** due to variation in initial and final year of data for each country, no standard initial/final year for all countries
 * instead, use first available data point up to 1995 for initial and 2000, 2001, or 1999 as endpoint
gen rppre = (n2000-n1990)/10
	**2000
forvalues num=9(-1)5{
local year = 2000 - `num'
 replace rppre = (n2000 - n`year')/`num' if rppre==.
   }
	**2001 (if 2000 is missing)
forvalues num=11(-1)6{
local year = 2001 - `num'
 replace rppre = (n2001 - n`year')/`num' if rppre==.
   }
	**1999 (if 2000 and 2001 are missing)   
forvalues num=9(-1)4{
local year = 1999 - `num'
 replace rppre = (n1999 - n`year')/`num' if rppre==.
   }

 **UN defines success as 0.97
 **Exclude countries from rate of progress calculations if they have a value in 2000 (or 2001 or 1999 if no data in 2000) of >=.96 
gen all_rppre = rppre
replace rppre = . if n2000>=.96 & n2000!=. 
replace rppre = . if n2000==. & n2001>=.96 & n2001!=.
replace rppre = . if n2000==. & n2001==. & n1999>=.96 & n1999!=.

 
**************************************
 * B * Post-MDG Average Annual Rate of Change - percentage point
**************************************
**Most recent current data is for 2014. Code works when data also released for 2015. 

 *if data is missing for 2015, use down to 2010, if missing for 2000 use 2001 then 1999
gen rppost = (n2015-n2000)/15
	**2000
forvalues num=14(-1)10{
local year = 2000 + `num'
 replace rppost = (n`year' - n2000)/`num' if rppost==.
   }
	**2001 (if 2000 is missing)
forvalues num=14(-1)9{
local year = 2001 + `num'
 replace rppost = (n`year' - n2001)/`num' if rppost==.
   }
	**1999 (if 2000 and 2001 are missing)   
forvalues num=16(-1)11{
local year = 1999 + `num'
 replace rppost = (n`year' - n1999)/`num' if rppost==.
   }
   
**For countries that reach .97 before final year, need to calculate rate of progress up to that year instead of final year
**e.g. if 2007>=.97 & last year with data >=.97, use 2007 to calculate rate of progress

*1 create a full final year value that finds the last year of data down to 2010
gen fulln2015 = n2015
 replace fulln2015 = n2014 if n2015==.
 replace fulln2015 = n2013 if n2015==. & n2014==.
 replace fulln2015 = n2012 if n2015==. & n2014==. & n2013==.
 replace fulln2015 = n2011 if n2015==. & n2014==. & n2013==. & n2012==.
 replace fulln2015 = n2010 if n2015==. & n2014==. & n2013==. & n2012==. & n2011==.
 
*2a Find first year 2002 onward with a value >= .97 and replace rate of progress calculation using that year
forvalues num=14(-1)2{
local year = 2000 + `num'
 replace rppost = (n`year' - n2000)/`num' if fulln2015>=.97 & fulln2015!=. & n`year'>=.97 & n`year'!=.
   }
   
*2b use 2001 if 2000 is missing   
forvalues num=13(-1)1{
local year = 2001 + `num'
 replace rppost = (n`year' - n2001)/`num' if n2000==. & fulln2015>=.97 & fulln2015!=. & n`year'>=.97 & n`year'!=.
   }   
   
*2c use 1999 if 2000 & 2001 are missing
forvalues num=15(-1)3{
local year = 1999 + `num'
 replace rppost = (n`year' - n1999)/`num' if n2000==. & n2001==. & fulln2015>=.97 & fulln2015!=. & n`year'>=.97 & n`year'!=.
   }    

 **Exclude countries from rate of progress calculations if they have a value in 2000 (or 2001 if no data in 2000) of >=.96
gen all_rppost = rppost
replace rppost = . if n2000>=.96 & n2000!=. 
replace rppost = . if n2000==. & n2001>=.96 & n2001!=.
replace rppost = . if n2000==. & n2001==. & n1999>=.96 & n1999!=.


**************************************
 * C * Find the difference in rate of progress before/after MDG
**************************************
*1990-2000 v. 2000-2015 
local listb "pre" //years 1990-2000
local listc "post" //years 2000-2015
foreach pre of local listb{
foreach pos of local listc{

gen difrp`pre'_`pos' = rp`pos'-rp`pre' 
label var difrp`pre'_`pos' "dif in rate of progress, (`pos')-(`pre')"

gen fulldata`pre'_`pos'=0
replace fulldata`pre'_`pos' = 1 if rp`pre'!=. & rp`pos'!=.
label var fulldata`pre'_`pos' "# countries with data for all years `pre' `pos' "
}
}
**************************************
 * D * Use rate of progress to generate counterfactual for 2000-2015
**************************************
** at 1990-2000
local listb "pre"

foreach pre of local listb{
for num 2001/2015: gen cf`pre'_X = n2000+(rp`pre'*(X - 2000)) if rp`pre'!=.
 for num 2001/2015: replace cf`pre'_X = n2001+(rp`pre'*(X - 2001)) if n2000==. & rp`pre'!=. //where value in 2000 is missing
 for num 2001/2015: replace cf`pre'_X = n1999+(rp`pre'*(X - 1999)) if n2000==. & n2001==. & rp`pre'!=. //where value in 2000 & 20001 are missing
for num 2001/2015: replace cf`pre'_X = 1 if cf`pre'_X > 1 & cf`pre'_X!=. //can't have more than 1(set 1 as max)
for num 2001/2015: label variable cf`pre'_X "`p' X extrapolation from `pre' rates"
}
save "`p'_rpfull", replace
}

***************************************************
**II. COUNTRY COUNT ACCELERATION TEST  
***************************************************

local lista "genderratio_primary_85 genderratio_secondary_85 genderratio_tertiary_85"
foreach p of local lista{

use "`p'_rpfull", clear

**create full country version of 1990 (if missing go up to 1995) and 2000 (if missing, check 2001 then 1999)
gen fulln2000 = n2000
 replace fulln2000 = n2001 if n2000 ==.
 replace fulln2000 = n1999 if n2000 ==. & n2001==.
 
gen fulln1990 = n1990
 replace fulln1990 = n1991 if n1990==.
 replace fulln1990 = n1992 if n1990==. & n1991==.
 replace fulln1990 = n1993 if n1990==. & n1991==. & n1992==.
 replace fulln1990 = n1994 if n1990==. & n1991==. & n1992==. & n1993==.
 replace fulln1990 = n1995 if n1990==. & n1991==. & n1992==. & n1993==. & n1994==.


**************************************************************************************
 * A * Acceleration test - simple country count
**************************************************************************************
* (i) Diference in speed - any difference
gen difrppre_post_any = 0
 replace difrppre_post_any = 1 if difrppre_post>0
 replace difrppre_post_any = . if difrppre_post==.
 replace difrppre_post_any = 0 if rppost<=0 & difrppre_post!=. //exclude countries from acceleration count that are still decreasing access (even if at a slower rate)
 
* (ii) Difference in speed - "real" >.33 percentage point difference 
gen difrppre_post_1 = 0
 replace difrppre_post_1 = 1 if difrppre_post >=.0033 
 replace difrppre_post_1=. if difrppre_post==.
 replace difrppre_post_1 = 0 if rppost<=0 & difrppre_post!=. //exclude countries from acceleration count that are still decreasing access (even if at a slower rate)
label variable difrppre_post_1 "rate of progress in 0014 .33 pct point greater than pre"

**Due to nature of indicator and missing data, no calculation attempted for lives improved
save "`p'_analysis", replace
}


