**this the file to calculate life expectancy at various ages for broookings
**created on Nov 1, 2023 by asd

version 17.0
#delimit ;
clear;

tempfile temp temp2 tempold temp0 temp3 temp1 tempsmart;

**cvd_can_sex has data on mortality rates by sex year age for pslrate, deaths of despair rate;
**cvdrate, death rate from cardiovascular disease;
**Crate, death rate for cancer;

use cvd_can_sex;

keep sex year age EDclass pslrate cvdrate Crate;

**now adjusting the rates by holding death rates fixed at their 1992 level;

keep if year<=1992;
collapse (mean) pslrate cvdrate Crate, by(age sex EDclass);
rename pslrate psl0;
rename Crate can0;
rename cvdrate cvd0;

save `temp0';

use cvd_can_sex;
keep sex year age EDclass pslrate Trate COVrate Crate cvdrate;

merge m:1 sex age EDclass using `temp0';
drop _m;

**Trate is the overall death rate;
**Note COVID death rate is zero in 1992;
**these mortality rates are adjusted to hold various disease mortality rates to their 1992 levels;

gen m0=Trate;
gen m1=Trate-COVrate;
gen m2=Trate-pslrate+psl0;
gen m3=Trate-Crate+can0;
gen m4=Trate-cvdrate+cvd0;
gen m12=Trate-pslrate+psl0-COVrate;
gen m24=Trate-pslrate+psl0-cvdrate+cvd0;
gen m124=Trate-pslrate+psl0-cvdrate+cvd0-COVrate;
gen m234=Trate-pslrate+psl0-cvdrate+cvd0-Crate+can0;
gen m1234=Trate-pslrate+psl0-cvdrate+cvd0-Crate+can0-COVrate;


drop Trate pslrate psl0 Crate can0 COVrate cvdrate cvd0;;

save mrates, replace;

local ems "0 1 2 3 4 12 24 124 234 1234";

**ems has the various mortality rate hypotheticals;

drop _all;

**Next bit does extrapolations for the elderly, where we don't want to use the death certificates;
**This is done separately by three education groups, 0 (all), 1 (no BA), 3 (BA or more) and by sex;
** sex==0,1,2 where 0 is all, 1 is males, 2 is females;

foreach mort of local ems {;

**need to generate an age year data set that runs from 85 to 120;
**which we are going to use to complete the life table for each education and sex group;

drop _all;
set obs 36;
gen age=_n+84;
gen year=1992;
save `tempold', replace;

forvalues y=1992(1)2021 {;
	drop _all;
	set obs 36;
	gen age=_n+84;
	gen year=`y';
	dis("`y'");
	if `y'>1992 {;
		append using `tempold';};
	save `tempold', replace;
	
};
use `tempold';
gen lno=.;
save `tempold', replace;
local ees "0 1 3";
local icc=1;
foreach e of local ees {;	
		forvalues mf=0(1)2 {;
		drop _all;
		use mrates;
		keep if EDclass==`e' & sex==`mf';
		keep age m`mort' year;
		rename m`mort' m;
		gen lno=ln(m/(1-m));
		drop m;
		save `temp2', replace;
		
		**we need it to go to 120;
		append using `tempold';
		
		**now age runs from 25 to 100;
		gen age_2=age^2;
		gen age_3=age^3;
		
		regress lno i.year age if age>=70 & age <=84;
		predict lnoh1;
		regress lno i.year age age_2 if age>=70 & age <=84;
		predict lnoh2;
		regress lno i.year age age_2 age_3 if age>=70 & age <=84;
		predict lnoh3;
		keep if age>=85;
		gen mh1=exp(lnoh1)/(1+exp(lnoh1)) if age>=85;
		gen mh2=exp(lnoh2)/(1+exp(lnoh2)) if age>=85;
		gen mh3=exp(lnoh3)/(1+exp(lnoh3)) if age>=85;
		gen EDclass=`e';
		gen sex=`mf';
		drop lno* age_2 age_3;
		if `icc'>1 {;
			append using `tempsmart';
		};
		save `tempsmart', replace; 
		local icc=`icc'+1;
	};
};

/*
**that is the extension for old ages done!;

**the code below uses standard demographic methods to calculate "temporary" life expectancy, expected
**years between 25th and 85th birthdays" for each sex and Education group and each year using the
**unadjusted and adjusted mortality rates as inputs


The code below calculates (more than) the life expectancy numbers that are used in the paper
They are all for adults, starting at age 25, and makes a series of datasets all beginning with
le25_ and the number that refers to the causes that have been modified. So 0 means the raw data. 
They are listed in local ems above with each corresponding to holding constant one or more
causes of death. 

Within each of the data sets everything is done by sex, EDclass, and year. e25 and e25_60, which 
are life expectancy at 25 and life expectancy from 25th to 85th birthday are the two quantities
that are used in the paper. The rest are not used and are irrelevant for replication purposes
they are left there to avoid the possibility of introducing errors by altering the original code
*/


local ees "0 1 3";
local apx "mh3";

local icc=1;
forvalues y=1992(1)2021 {;
			foreach ee of local ees {;
			forvalues mf=0(1)2 {;
	
			**this brings in the older mortality rates;	
			use `tempsmart';
		
			keep if EDclass==`ee' & year==`y' & sex==`mf';
			
			keep age `apx';
			rename `apx' m;
			save `temp1', replace;
			
			list;
		
			
			drop _all;
			use mrates;
			keep if EDclass==`ee' & year==`y' & sex==`mf';
			display("`mf' `ee' `y'");
			rename m`mort' m;
			keep age m;
			
			append using `temp1';
			
			gen q=m/(1+m/2);
			gen I=100000;
			forvalues i=2(1)96 {;
				local mm=I[`i'-1]*(1-q[`i'-1]);
				qui replace I=`mm' in `i';
			};
			gen d=I*q;
			gen L=I-d/2;
			summ L in 76/96;
			replace L=r(sum) in 76;
			drop in 77/96;
			gen T=L[76];
			
			
			local ic=76;
				forvalues i=1(1)75 {;
				local mm=L[76-`i'];
				local id=76-`i';
				qui replace T=T[77-`i']+`mm' in `id';
			};
			
			gen e=T/I;
			
			sort age;
			gen LL=sum(L);
			replace LL=LL/100000;
			mkmat LL;
			mkmat T;
			mkmat I;
			matrix I=I';
			matrix T=T';
			matrix LL=LL';
			local e25=e[1];
			local e35=e[11];
			local e45=e[21];
			local e55=e[31];
			local e65=e[41];
			local e75=e[51];
			drop _all;
			set obs 1;
			**Dropped everything and now all in matrices;
			forvalues d=25(10)75 {;
				gen e`d'=`e`d'';
			};
			/*
			**this is now a data set with just the six life expectancies;
			**what is below is calculations of temporary life expectancies;
			**indeed two different ways  as a cross-check;
			**LL is sum of years lived from the bottom in each age category;
			**Alternative method just uses T which I understand better;
			*/
			svmat LL;
			svmat T;
			svmat I;
			forvalues j=10(10)60 {;
				local ff=LL`j'[1];
				gen e25_`j'=`ff';
			};
			
				gen tm_e_25_10=100000*LL10[1]/I1[1];
				gen tm_e_35_10=100000*(LL20[1]-LL10[1])/I11[1];
				gen tm_e_45_10=100000*(LL30[1]-LL20[1])/I21[1];
				gen tm_e_55_10=100000*(LL40[1]-LL30[1])/I31[1];
				gen tm_e_65_20=100000*(LL60[1]-LL40[1])/I41[1];

			forvalues j=11(10)61 {;
				local ff=(T1[1]-T`j'[1])/100000;
				local j1=`j'-1;
				gen e25_`j1'_x=`ff';
			};
				gen tmx_e_25_10=(T1[1]-T11[1])/I1[1];
				gen tmx_e_35_10=(T11[1]-T21[1])/I11[1];
				gen tmx_e_45_10=(T21[1]-T31[1])/I21[1];
				gen tmx_e_55_10=(T31[1]-T41[1])/I31[1];
				gen tmx_e_65_20=(T41[1]-T61[1])/I41[1];
			

			drop LL*;
			drop T*;
			drop I*;
			gen EDclass=`ee';	
			gen year=`y';
			gen sex=`mf';
			if `icc'>1 {;
				append using le25_`mort';
			};
			save le25_`mort', replace;
			local icc=`icc'+1;
			};
		};
		};

};


