***********************************************************
* Labor Income Dynamics project
* 
***********************************************************
* This program reads in hhinc_LT_data.dta, which contains records 
* from the 1987-96 family panel and the 1997-2009 insoles.  
*
* It then calculates descriptive statics from these data
*
*************************************************************
#delimit;

clear;
program drop _all;
clear matrix;
set matsize 1000;
set memory 4000m;
set more off;
cap log close;


local datapath "G:\%Office of Tax Analysis\_Individual Staff\Ramnath\Tax Research\BPEA\LaborInc\BPEA\data" ;
local logpath "G:\%Office of Tax Analysis\_Individual Staff\Ramnath\Tax Research\BPEA\LaborInc\BPEA\log_files" ;
local graphpath "G:\%Office of Tax Analysis\_Individual Staff\Ramnath\Tax Research\BPEA\LaborInc\BPEA\graphs" ;
local outputpath "G:\%Office of Tax Analysis\_Individual Staff\Ramnath\Tax Research\BPEA\LaborInc\BPEA\output" ;


log using "`logpath'\hhinc_LT-samplestats.log", replace;

cd "G:\%Office of Tax Analysis\_Individual Staff\Ramnath\Tax Research\BPEA\LaborInc\BPEA\tempfiles";

*** Open data file ***;
use "`datapath'\hhinc_LT_data.dta", clear;
compress;
ci;


* summarize;
sum age year;
ci hhinc;

xtdes;




****** Sample descriptive statistics ****************;

* Number of observations per year;
tab year;

* Age distribution;
tab age;

* Age distribution by year;
tab age year;


****** Income descriptive statistics ****************;

* Histogram of pooled sample;

hist lnhhinc, bin(50) title(Household Income Histogram) xtitle(Labor Income) saving(hhinc1, replace); 

kdensity lnhhinc, title(Household Income Kernel Density) xtitle(Labor Income) saving(hhinc2, replace); 

graph combine hhinc1.gph hhinc2.gph, saving(hhinccombined, replace); 	
graph export "`graphpath'\hhinc_LTcombined.wmf", replace;

* Summary statistics of pooled sample;
sum lnhhinc, detail;

* Evolution of sample average by year (with number of observations;
table year, c(m lnhhinc n lnhhinc);

* preserve dataset;
snapshot save, label("predropsmpl");

collapse (mean) mean_lnhhinc=lnhhinc (count) lnhhinc, by(year) ;
outsheet using "`outputpath'\hhinc_LT_mean_yr.txt", replace ;

snapshot restore 1;

****** Evolution of Income Inequality Measures ****************;

* Evolution of Cross sectional variance;
table year, c(sd lnhhinc);

collapse (sd) sd_lnhhinc=lnhhinc, by(year) ;
outsheet using "`outputpath'\hhinc_LT_sd_yr.txt", replace ;

snapshot restore 1;

* Evolution of Coefficient of Variation;
table year, c(sd lnhhinc m lnhhinc);


collapse (mean) mean_lnhhinc=lnhhinc (sd) sd_lnhhinc=lnhhinc, by(year) ;
outsheet using "`outputpath'\hhinc_LT_cv_yr.txt", replace ;
snapshot restore 1;

* Evolution of log percentile ratios;
table year, c(p90 lnhhinc p50 lnhhinc p10 lnhhinc);


collapse (p90) p90_lnhhinc=lnhhinc (p50) p50_lnhhinc=lnhhinc (p10) p10_lnhhinc=lnhhinc, by(year) ;
outsheet using "`outputpath'\hhinc_LT_pct_yr.txt", replace ;
snapshot restore 1;

* Evolution of share of income;

* by quintile;

gen hhincquint = 0;
forvalues y = 1987(1)2009 {;
	xtile hhincquint`y' = hhinc if year == `y', nquantiles(5);
	replace hhincquint = hhincquint`y' if year == `y';
};

table year hhincquint, c(sum hhincM);


collapse (sum) hhincM, by(year hhincquint) ;
outsheet using "`outputpath'\hhinc_LT_quint_yr.txt", replace ;
snapshot restore 1;


* high quantiles;

* generate percentiles;
gen hhincquant = 0;
forvalues y = 1987(1)2009 {;
	xtile hhincquant`y' = hhinc if year == `y', nquantiles(100);
	replace hhincquant = hhincquant`y' if year == `y';
};

* collapse into quantiles;
replace hhincquant = 96 if hhincquant >= 96 & hhincquant <= 99;
replace hhincquant = 91 if hhincquant >= 91 & hhincquant <= 95;
replace hhincquant = 1 if hhincquant <= 90;

table year hhincquant, c(sum hhincM);


collapse (sum) hhincM, by(year hhincquant) ;
outsheet using "`outputpath'\hhinc_LT_quant_yr.txt", replace ;
snapshot restore 1;

* Evolution of gini coefficient;
gen gini = . ;
forvalues y = 1987(1)2009 {;
	quietly inequal7 hhinc if year == `y', returnscalars ;
	replace gini = r(gini) if year == `y' ;
};
collapse (mean) gini, by(year) ;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_gini_yr.txt", replace ;
snapshot restore 1;




***** Decomposition of variance of earnings measure (Kopczuk, Saez and Song Figure 5 ****************;

* Three Years;

forvalues i = 1988(1)2008 {;

* preserve dataset;
preserve;

* keep three year span;
local j = `i' - 1;
local k = `i' + 1;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample ;
by issnp: egen numobs = count(lnhhinc);
keep if numobs == 3;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_3 = mean(varlnhhinc);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_3 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_3 = sdlnhhincdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_3 varmeanlnhhinc_3 varlnhhincdif_3;

if year == 1988 {;
	save KSS_3, replace;
	};
if year > 1988 {;
	append using KSS_3;
	sort year;
	save KSS_3, replace;
	};
	
restore;
};


*** Five Years ***;

forvalues i = 1989(1)2007 {;

* preserve dataset;
preserve;

* keep five year span;
local j = `i' - 2;
local k = `i' + 2;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample from t-2 through t+2 (five years);
by issnp: egen numobs = count(lnhhinc);
keep if numobs == 5;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_5 = mean(varlnhhinc);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_5 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_5 = sdlnhhincdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_5 varmeanlnhhinc_5 varlnhhincdif_5;

if year == 1989 {;
	save KSS_5, replace;
	};
if year > 1989 {;
	append using KSS_5;
	sort year;
	save KSS_5, replace;
	};
	
restore;
};




*** Seven Years ***;

forvalues i = 1990(1)2006 {;

* preserve dataset;
preserve;

* keep seven year span;
local j = `i' - 3;
local k = `i' + 3;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample from t-3 through t+3 (seven years);
by issnp: egen numobs = count(lnhhinc);
keep if numobs == 7;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_7 = mean(varlnhhinc);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_7 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_7 = sdlnhhincdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_7 varmeanlnhhinc_7 varlnhhincdif_7;

if year == 1990 {;
	save KSS_7, replace;
	};
if year > 1990 {;
	append using KSS_7;
	sort year;
	save KSS_7, replace;
	};
	
restore;
};



*** Nine Years ***;

forvalues i = 1991(1)2005 {;

* preserve dataset;
preserve;

* keep nine year span;
local j = `i' - 4;
local k = `i' + 4;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample from t-4 through t+4 (nine years);
by issnp: egen numobs = count(lnhhinc);
keep if numobs == 9;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_9 = mean(varlnhhinc);


* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_9 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_9 = sdlnhhincdif^2;


* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_9 varmeanlnhhinc_9 varlnhhincdif_9;

if year == 1991 {;
	save KSS_9, replace;
	};
if year > 1991 {;
	append using KSS_9;
	sort year;
	save KSS_9, replace;
	};
	
restore;
};



***** Decomposition of variance of earnings measure (Kopczuk, Saez and Song Figure 5) -- But keep those even not continuously in sample ****************;
*clear ;
*use temp2 ;

* Three Years;

forvalues i = 1988(1)2008 {;

* preserve dataset;
preserve;

* keep three year span;
local j = `i' - 1;
local k = `i' + 1;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_3 = mean(varlnhhinc);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_3 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_3 = sdlnhhincdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_3 varmeanlnhhinc_3 varlnhhincdif_3;

if year == 1988 {;
	save KSS_nodrop_3, replace;
	};
if year > 1988 {;
	append using KSS_nodrop_3;
	sort year;
	save KSS_nodrop_3, replace;
	};
	
restore;
};


*** Five Years ***;

forvalues i = 1989(1)2007 {;

* preserve dataset;
preserve;

* keep five year span;
local j = `i' - 2;
local k = `i' + 2;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_5 = mean(varlnhhinc);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_5 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_5 = sdlnhhincdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_5 varmeanlnhhinc_5 varlnhhincdif_5;

if year == 1989 {;
	save KSS_nodrop_5, replace;
	};
if year > 1989 {;
	append using KSS_nodrop_5;
	sort year;
	save KSS_nodrop_5, replace;
	};
	
restore;
};




*** Seven Years ***;

forvalues i = 1990(1)2006 {;

* preserve dataset;
preserve;

* keep seven year span;
local j = `i' - 3;
local k = `i' + 3;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_7 = mean(varlnhhinc);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_7 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_7 = sdlnhhincdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_7 varmeanlnhhinc_7 varlnhhincdif_7;

if year == 1990 {;
	save KSS_nodrop_7, replace;
	};
if year > 1990 {;
	append using KSS_nodrop_7;
	sort year;
	save KSS_nodrop_7, replace;
	};
	
restore;
};



*** Nine Years ***;

forvalues i = 1991(1)2005 {;

* preserve dataset;
preserve;

* keep nine year span;
local j = `i' - 4;
local k = `i' + 4;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdlnhhinc = sd(lnhhinc);
gen varlnhhinc = sdlnhhinc^2;
egen meanvarlnhhinc_9 = mean(varlnhhinc);


* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanlnhhinc = mean(lnhhinc);
keep if year == `i';
egen sdmeanlnhhinc = sd(meanlnhhinc);
gen varmeanlnhhinc_9 = sdmeanlnhhinc^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen lnhhincdif = lnhhinc - meanlnhhinc;
egen sdlnhhincdif = sd(lnhhincdif);
gen varlnhhincdif_9 = sdlnhhincdif^2;


* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarlnhhinc_9 varmeanlnhhinc_9 varlnhhincdif_9;

if year == 1991 {;
	save KSS_nodrop_9, replace;
	};
if year > 1991 {;
	append using KSS_nodrop_9;
	sort year;
	save KSS_nodrop_9, replace;
	};
	
restore;
};


****** Gottschalk and Moffitt "BPEA" method, from JEP (Fall 2009) paper, on page 7. ****************;

*** Three Year Span ***;

forvalues i = 1988(1)2008 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 3;

* keep three year span;
local j = `i' - 1;
local k = `i' + 1;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(lnhhinc);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(lnhhinc);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdlnhhinc = sd(lnhhinc);
gen indivvarlnhhinc = sdlnhhinc^2;

* calculate individual level mean;
by issnp: egen indivmeanlnhhinc = mean(lnhhinc);

* calculate overal mean;
*egen meanlnhhinc = mean(lnhhinc);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_3 = mean(indivvarlnhhinc);

* Calculate variance in mean earnings across individuals; 
egen sdmeanlnhhinc = sd(indivmeanlnhhinc);
gen varmeanlnhhinc = sdmeanlnhhinc^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_3 = varmeanlnhhinc - (transvar)/meanT;


* Save total variance variable;
gen totvar_3 = varmeanlnhhinc;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_3 permvar_3 totvar_3;

if year == 1988 {;
	save GMBPEA_3, replace;
	};
if year > 1988 {;
	append using GMBPEA_3;
	sort year;
	save GMBPEA_3, replace;
	};
	
restore;
};

*** Five Year Span ***;

forvalues i = 1989(1)2007 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 5;

* keep five year span;
local j = `i' - 2;
local k = `i' + 2;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(lnhhinc);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(lnhhinc);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdlnhhinc = sd(lnhhinc);
gen indivvarlnhhinc = sdlnhhinc^2;

* calculate individual level mean;
by issnp: egen indivmeanlnhhinc = mean(lnhhinc);

* calculate overal mean;
*egen meanlnhhinc = mean(lnhhinc);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_5 = mean(indivvarlnhhinc);

* Calculate variance in mean earnings across individuals; 
egen sdmeanlnhhinc = sd(indivmeanlnhhinc);
gen varmeanlnhhinc = sdmeanlnhhinc^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_5 = varmeanlnhhinc - (transvar)/meanT;

* Save total variance variable;
gen totvar_5 = varmeanlnhhinc;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_5 permvar_5 totvar_5;

if year == 1989 {;
	save GMBPEA_5, replace;
	};
if year > 1989 {;
	append using GMBPEA_5;
	sort year;
	save GMBPEA_5, replace;
	};
	
restore;
};


*** Seven Year Span ***;

forvalues i = 1990(1)2006 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 9;

* keep seven year span;
local j = `i' - 3;
local k = `i' + 3;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(lnhhinc);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(lnhhinc);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdlnhhinc = sd(lnhhinc);
gen indivvarlnhhinc = sdlnhhinc^2;

* calculate individual level mean;
by issnp: egen indivmeanlnhhinc = mean(lnhhinc);

* calculate overal mean;
*egen meanlnhhinc = mean(lnhhinc);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_7 = mean(indivvarlnhhinc);

* Calculate variance in mean earnings across individuals; 
egen sdmeanlnhhinc = sd(indivmeanlnhhinc);
gen varmeanlnhhinc = sdmeanlnhhinc^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_7 = varmeanlnhhinc - (transvar)/meanT;

* Save total variance variable;
gen totvar_7 = varmeanlnhhinc;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_7 permvar_7 totvar_7;

if year == 1990 {;
	save GMBPEA_7, replace;
	};
if year > 1990 {;
	append using GMBPEA_7;
	sort year;
	save GMBPEA_7, replace;
	};
	
restore;
};



*** Nine Year Span ***;

forvalues i = 1991(1)2005 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 9;

* keep nine year span;
local j = `i' - 4;
local k = `i' + 4;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(lnhhinc);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(lnhhinc);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdlnhhinc = sd(lnhhinc);
gen indivvarlnhhinc = sdlnhhinc^2;

* calculate individual level mean;
by issnp: egen indivmeanlnhhinc = mean(lnhhinc);

* calculate overal mean;
*egen meanlnhhinc = mean(lnhhinc);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_9 = mean(indivvarlnhhinc);

* Calculate variance in mean earnings across individuals; 
egen sdmeanlnhhinc = sd(indivmeanlnhhinc);
gen varmeanlnhhinc = sdmeanlnhhinc^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_9 = varmeanlnhhinc - (transvar)/meanT;

* Save total variance variable;
gen totvar_9 = varmeanlnhhinc;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_9 permvar_9 totvar_9;

if year == 1991 {;
	save GMBPEA_9, replace;
	};
if year > 1991 {;
	append using GMBPEA_9;
	sort year;
	save GMBPEA_9, replace;
	};
	
restore;
};

*** Display results for Kopczuk Saez Song decompositions (all windows) ***;
use KSS_3, clear;
outsheet using "`outputpath'\hhinc_LT_kss3.txt", replace ;
list;
use KSS_5, clear;
outsheet using "`outputpath'\hhinc_LT_kss5.txt", replace ;
list;
use KSS_7, clear;
outsheet using "`outputpath'\hhinc_LT_kss7.txt", replace ;
list;
use KSS_9, clear;
outsheet using "`outputpath'\hhinc_LT_kss9.txt", replace ;
list;

use KSS_nodrop_3, clear;
outsheet using "`outputpath'\hhinc_LT_kss3_nodrop.txt", replace ;
list;
use KSS_nodrop_5, clear;
outsheet using "`outputpath'\hhinc_LT_kss5_nodrop.txt", replace ;
list;
use KSS_nodrop_7, clear;
outsheet using "`outputpath'\hhinc_LT_kss7_nodrop.txt", replace ;
list;
use KSS_nodrop_9, clear;
outsheet using "`outputpath'\hhinc_LT_kss9_nodrop.txt", replace ;
list;

*** Display results for Gottschalk and Moffit simple decompositions (all windows) ***;
use GMBPEA_3, clear;
outsheet using "`outputpath'\hhinc_LT_gmbpea3.txt", replace ;
list;
use GMBPEA_5, clear;
outsheet using "`outputpath'\hhinc_LT_gmbpea5.txt", replace ;
list;
use GMBPEA_7, clear;
outsheet using "`outputpath'\hhinc_LT_gmbpea7.txt", replace ;
list;
use GMBPEA_9, clear;
outsheet using "`outputpath'\hhinc_LT_gmbpea9.txt", replace ;
list;




**************** USING RESIDUALS **************************;

* Read in residual dataset;
use "`datapath'\FinalSampleResids_hhinc_LT.dta", clear;


***** Decomposition of variance of earnings measure (Kopczuk, Saez and Song Figure 5 ****************;

* Three Years;

forvalues i = 1988(1)2008 {;

* preserve dataset;
preserve;

* keep three year span;
local j = `i' - 1;
local k = `i' + 1;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample ;
by issnp: egen numobs = count(res);
keep if numobs == 3;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_3 = mean(varres);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_3 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_3 = sdresdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_3 varmeanres_3 varresdif_3;

if year == 1988 {;
	save KSSres_3, replace;
	};
if year > 1988 {;
	append using KSSres_3;
	sort year;
	save KSSres_3, replace;
	};
	
restore;
};


*** Five Years ***;

forvalues i = 1989(1)2007 {;

* preserve dataset;
preserve;

* keep five year span;
local j = `i' - 2;
local k = `i' + 2;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample from t-2 through t+2 (five years);
by issnp: egen numobs = count(res);
keep if numobs == 5;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_5 = mean(varres);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_5 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_5 = sdresdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_5 varmeanres_5 varresdif_5;

if year == 1989 {;
	save KSSres_5, replace;
	};
if year > 1989 {;
	append using KSSres_5;
	sort year;
	save KSSres_5, replace;
	};
	
restore;
};




*** Seven Years ***;

forvalues i = 1990(1)2006 {;

* preserve dataset;
preserve;

* keep seven year span;
local j = `i' - 3;
local k = `i' + 3;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample from t-3 through t+3 (seven years);
by issnp: egen numobs = count(res);
keep if numobs == 7;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_7 = mean(varres);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_7 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_7 = sdresdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_7 varmeanres_7 varresdif_7;

if year == 1990 {;
	save KSSres_7, replace;
	};
if year > 1990 {;
	append using KSSres_7;
	sort year;
	save KSSres_7, replace;
	};
	
restore;
};



*** Nine Years ***;

forvalues i = 1991(1)2005 {;

* preserve dataset;
preserve;

* keep nine year span;
local j = `i' - 4;
local k = `i' + 4;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample from t-4 through t+4 (nine years);
by issnp: egen numobs = count(res);
keep if numobs == 9;

* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_9 = mean(varres);


* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_9 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_9 = sdresdif^2;


* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_9 varmeanres_9 varresdif_9;

if year == 1991 {;
	save KSSres_9, replace;
	};
if year > 1991 {;
	append using KSSres_9;
	sort year;
	save KSSres_9, replace;
	};
	
restore;
};



* Read in residual dataset;
use "`datapath'\FinalSampleResids_hhinc_LT.dta", clear;


***** Decomposition of variance of earnings measure (Kopczuk, Saez and Song Figure 5) -- But keep individuals even if not continuously in sample ****************;

* Three Years;

forvalues i = 1988(1)2008 {;

* preserve dataset;
preserve;

* keep three year span;
local j = `i' - 1;
local k = `i' + 1;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_3 = mean(varres);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_3 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_3 = sdresdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_3 varmeanres_3 varresdif_3;

if year == 1988 {;
	save KSSres_nodrop_3, replace;
	};
if year > 1988 {;
	append using KSSres_nodrop_3;
	sort year;
	save KSSres_nodrop_3, replace;
	};
	
restore;
};


*** Five Years ***;

forvalues i = 1989(1)2007 {;

* preserve dataset;
preserve;

* keep five year span;
local j = `i' - 2;
local k = `i' + 2;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_5 = mean(varres);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_5 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_5 = sdresdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_5 varmeanres_5 varresdif_5;

if year == 1989 {;
	save KSSres_nodrop_5, replace;
	};
if year > 1989 {;
	append using KSSres_nodrop_5;
	sort year;
	save KSSres_nodrop_5, replace;
	};
	
restore;
};




*** Seven Years ***;

forvalues i = 1990(1)2006 {;

* preserve dataset;
preserve;

* keep seven year span;
local j = `i' - 3;
local k = `i' + 3;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_7 = mean(varres);

* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_7 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_7 = sdresdif^2;

* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_7 varmeanres_7 varresdif_7;

if year == 1990 {;
	save KSSres_nodrop_7, replace;
	};
if year > 1990 {;
	append using KSSres_nodrop_7;
	sort year;
	save KSSres_nodrop_7, replace;
	};
	
restore;
};



*** Nine Years ***;

forvalues i = 1991(1)2005 {;

* preserve dataset;
preserve;

* keep nine year span;
local j = `i' - 4;
local k = `i' + 4;
keep if (year >= `j' & year <= `k');


* (1) tabulate for year t the variance of earnings for each year in window, take the mean,  and plot that as the "Annual Variance" 
* in year t ;

sort year;
by year: egen sdres = sd(res);
gen varres = sdres^2;
egen meanvarres_9 = mean(varres);


* take the mean of earnings from those years, and calculate the variance;
sort issnp;
by issnp: egen meanres = mean(res);
keep if year == `i';
egen sdmeanres = sd(meanres);
gen varmeanres_9 = sdmeanres^2;

* take the difference between earnings in t and the average earnings in the five years centered around t, and calculate the variance;
gen resdif = res - meanres;
egen sdresdif = sd(resdif);
gen varresdif_9 = sdresdif^2;


* Save result to Stata dataset;
keep if _n == 1;
keep year meanvarres_9 varmeanres_9 varresdif_9;

if year == 1991 {;
	save KSSres_nodrop_9, replace;
	};
if year > 1991 {;
	append using KSSres_nodrop_9;
	sort year;
	save KSSres_nodrop_9, replace;
	};
	
restore;
};




****** Gottschalk and Moffitt "BPEA" method, from JEP (Fall 2009) paper, on page 7. ****************;

*** Three Year Span ***;

forvalues i = 1988(1)2008 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 3;

* keep three year span;
local j = `i' - 1;
local k = `i' + 1;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(res);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(res);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdres = sd(res);
gen indivvarres = sdres^2;

* calculate individual level mean;
by issnp: egen indivmeanres = mean(res);

* calculate overal mean;
*egen meanres = mean(res);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_3 = mean(indivvarres);

* Calculate variance in mean earnings across individuals; 
egen sdmeanres = sd(indivmeanres);
gen varmeanres = sdmeanres^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_3 = varmeanres - (transvar)/meanT;


* Save total variance variable;
gen totvar_3 = varmeanres;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_3 permvar_3 totvar_3;

if year == 1988 {;
	save GMBPEAres_3, replace;
	};
if year > 1988 {;
	append using GMBPEAres_3;
	sort year;
	save GMBPEAres_3, replace;
	};
	
restore;
};

*** Five Year Span ***;

forvalues i = 1989(1)2007 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 5;

* keep five year span;
local j = `i' - 2;
local k = `i' + 2;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(res);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(res);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdres = sd(res);
gen indivvarres = sdres^2;

* calculate individual level mean;
by issnp: egen indivmeanres = mean(res);

* calculate overal mean;
*egen meanres = mean(res);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_5 = mean(indivvarres);

* Calculate variance in mean earnings across individuals; 
egen sdmeanres = sd(indivmeanres);
gen varmeanres = sdmeanres^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_5 = varmeanres - (transvar)/meanT;

* Save total variance variable;
gen totvar_5 = varmeanres;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_5 permvar_5 totvar_5;

if year == 1989 {;
	save GMBPEAres_5, replace;
	};
if year > 1989 {;
	append using GMBPEAres_5;
	sort year;
	save GMBPEAres_5, replace;
	};
	
restore;
};


*** Seven Year Span ***;

forvalues i = 1990(1)2003 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 9;

* keep seven year span;
local j = `i' - 3;
local k = `i' + 3;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(res);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(res);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdres = sd(res);
gen indivvarres = sdres^2;

* calculate individual level mean;
by issnp: egen indivmeanres = mean(res);

* calculate overal mean;
*egen meanres = mean(res);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_7 = mean(indivvarres);

* Calculate variance in mean earnings across individuals; 
egen sdmeanres = sd(indivmeanres);
gen varmeanres = sdmeanres^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_7 = varmeanres - (transvar)/meanT;

* Save total variance variable;
gen totvar_7 = varmeanres;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_7 permvar_7 totvar_7;

if year == 1990 {;
	save GMBPEAres_7, replace;
	};
if year > 1990 {;
	append using GMBPEAres_7;
	sort year;
	save GMBPEAres_7, replace;
	};
	
restore;
};



*** Nine Year Span ***;

forvalues i = 1991(1)2002 {;

* preserve dataset;
preserve;

* number of years variable;
local T = 9;

* keep nine year span;
local j = `i' - 4;
local k = `i' + 4;
keep if (year >= `j' & year <= `k');

* cut the sample to people who are continuously in the sample;
*by issnp: egen numobs = count(res);
*keep if numobs == `T';

* count mean number of years in the sample;
by issnp: egen numobs = count(res);
egen meanT = mean(numobs);

* calculate individual level variance;
by issnp: egen sdres = sd(res);
gen indivvarres = sdres^2;

* calculate individual level mean;
by issnp: egen indivmeanres = mean(res);

* calculate overal mean;
*egen meanres = mean(res);

* Keep only observations from year t;
keep if year == `i';

* Calculate transitory variance as mean of individual level variances;
egen transvar_9 = mean(indivvarres);

* Calculate variance in mean earnings across individuals; 
egen sdmeanres = sd(indivmeanres);
gen varmeanres = sdmeanres^2;

* Calculate permanent variance as variance of mean earnings across individuals minus a correction term;
gen permvar_9 = varmeanres - (transvar)/meanT;

* Save total variance variable;
gen totvar_9 = varmeanres;

* Save result to Stata dataset;
keep if _n == 1;
keep year transvar_9 permvar_9 totvar_9;

if year == 1991 {;
	save GMBPEAres_9, replace;
	};
if year > 1991 {;
	append using GMBPEAres_9;
	sort year;
	save GMBPEAres_9, replace;
	};
	
restore;
};

*** Display results for Kopczuk Saez Song decompositions (all windows) ***;
use KSSres_3, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres3.txt", replace ;
list;
use KSSres_5, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres5.txt", replace ;
list;
use KSSres_7, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres7.txt", replace ;
list;
use KSSres_9, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres9.txt", replace ;
list;

use KSSres_nodrop_3, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres3_nodrop.txt", replace ;
list;
use KSSres_nodrop_5, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres5_nodrop.txt", replace ;
list;
use KSSres_nodrop_7, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres7_nodrop.txt", replace ;
list;
use KSSres_nodrop_9, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_KSSres9_nodrop.txt", replace ;


*** Display results for Gottschalk and Moffit simple decompositions (all windows) ***;
use GMBPEAres_3, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_GMBPEAres3.txt", replace ;
list;
use GMBPEAres_5, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_GMBPEAres5.txt", replace ;
list;
use GMBPEAres_7, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_GMBPEAres7.txt", replace ;
list;
use GMBPEAres_9, clear;
format * %23.5f ;
outsheet using "`outputpath'\hhinc_LT_GMBPEAres9.txt", replace ;
list;


****** Evolution of descriptive measure of income inequality/instability (like in Shin and Solon Fig 1) ****************;

* Evolution of st. dev. of age adjusted year to year changes in log earnings;
* Note - estimation program must be run before this program to calculate residuals;

* Read in residual dataset;
use "`datapath'\FinalSampleResids_hhinc_LT.dta", clear;
tsset issnp year;

* Create two year difference in residuals;
gen resdif2 = res - l2.res;

* Create table of standard deviation of differences;
table year, c(sd resdif2);

save temp2, replace ;
collapse (sd) resdif2, by(year) ;
outsheet using "`outputpath'\hhinc_LT_resdif2_yr.txt", replace ;
clear ;
use temp2 ;

* Create one year difference in residuals;
gen resdif1 = res - l1.res;

* Create table of standard deviation of differences;
table year, c(sd resdif1);

save temp2, replace ;
collapse (sd) resdif1, by(year) ;
outsheet using "`outputpath'\hhinc_LT_resdif1_yr.txt", replace ;
clear ;
use temp2 ;


****** Gottschalk and Moffitt Approximate Nonparametric Method ****************;

use  issnp agem year res using  "`datapath'\FinalSampleResids_hhinc_LT.dta", clear;   

* create age group variable;
gen agemgroup = 1+ (agem >= 30) + (agem >= 35) + (agem >= 40) + (agem >= 45) + (agem >= 50) + (agem >= 55);
drop agem;

reshape wide res agemgroup , i(issnp) j(year);

/* Calculate yearly variance of residuals */
forvalues t = 1987(1)2009 {;
	egen sdres`t' = sd(res`t');
	gen varres`t' = sdres`t'^2;
};


/* Initialize counter for output */
local n = 1;

/* Initialize output variables */
gen yeart = 0;
gen yeartmj = 0;
gen lag = 0;
gen agemgroupt = 0;
gen covttmj = 0;
gen wgtttmj = 0;



/* Loop over all years of panel */
forvalues t = 1987(1)2009 {;
	/* Loop over age groups, where 1 = 25-29, 2 = 30-34, etc */
	forvalues a = 1(1)7 {;

	/* Loop over all possible lags */
	local JJ = `t' - 1987;      /* j loops bet. 0 & maximum lagged year */
	forvalues j = 0(1)`JJ' {;    /* NOTE: start at zero to compute variance too */
	


		/* Lagged year */
		local tmj = `t'-`j';

		* Compute all covariances and save as scalars. NOTE: Using
		* 'capture' here is probably not necessary.  I use it to be over-
                * cautious.  CAPTURE does the following: When there are errors
                * that lead the program to abort, the built-in scalar _rc 
		* is assigned the value 111 by Stata. In such cases, I assign '.' 
		* to the covariance and '0' to the number of observations used to 
		* compute the covariance (the weights).;

		capture correlate  res`t'  res`tmj' if agemgroup`t' == `a', cov;

		if _rc==111 {;
			qui gen cov = .;
			qui gen wgt = 0;
		};
		else {;
			qui gen cov = r(cov_12);
			qui gen wgt = r(N); 
		};

		/* Output result to observation n */
		qui replace yeart = `t' if _n == `n';
		qui replace yeartmj = `tmj' if _n == `n';
		qui replace lag = `j' if _n == `n';
		qui replace agemgroupt = `a' if _n == `n';
		qui replace covttmj = cov if _n == `n';
		qui replace wgtttmj = wgt if _n == `n';
		
		/* Drop intermediate variables */
		drop cov wgt;

		/* Increment counter for output */
		local n = `n' + 1;
	};
};
};

/* Assign yearly total variance by yeart */ 
gen varres = 0;
forvalues t = 1987(1)2009 {;
	replace varres = varres`t' if yeart == `t';
};

/* Keep only covariance data and tabulations of yearly variances*/
keep yeart yeartmj lag agemgroupt covttmj wgtttmj varres;

/* Keep only covariances where a covariance could be calculated*/
keep if wgtttmj > 0;

/* Regress log covariances against year dummies and a second order polynomial in  tau (lag) interacted with a (age 
group)*/
gen logcovttmj = ln(covttmj);

forvalues a = 1(1)7 {;
	qui gen lag_`a' = lag*(agemgroupt == `a');
};

forvalues a = 1(1)7 {;
	qui gen lagsq_`a' = lag^2*(agemgroupt == `a');
};

/* Year "dummy" equals (yeart = t) + (yeartmj = t) */
/* Note- when lag==0, year_`t' == 2  - this has effect of exp(predicted value) yielding the square of alphat when lag == 0*/
forvalues t = 1987(1)2009 {;
	qui gen year_`t' = (yeart == `t') + (yeartmj == `t');
};

/* Regress log covariances against year dummies and f(a,t), using only lags of 6 or more */
reg logcovttmj year_1987-year_2009 lag_* lagsq_* if lag >= 6, noconstant;

/* predict age-specific permanent variance when lag=0 */
predict logpermvar;
keep if lag == 0;
gen permvar = exp(logpermvar);

/* calculate implied transitory variance */
gen transvar = varres - permvar;

/* Print average of transitory variance over all ages */
table yeart, c(m transvar m permvar m varres);
 
save temp2, replace ;
collapse (mean) transvar permvar varres, by(yeart) ;
outsheet using "`outputpath'\hhinc_LT_vars_yr.txt", replace ;
clear ;
use temp2 ;

****** Gottschalk and Moffitt Approximate Nonparametric Method ****************;
****** using yearly covariances (not 5 year bands)   ****************;

use "`outputpath'\CovsData_hhinc_LT_CAL.dta", clear;

/* Recode the covariances between t and t+j for person age h in year t, to be the covariance 
between t and t-j for a person age H in year t.  */

gen yeart = yrvar + jvar;
gen yeartmj = yrvar;
gen lag = jvar;
gen PEt = hpjvar;
gen covttmj = covvar;
gen wgtttmj = wgtvar;

/* Keep only covariance data and tabulations of yearly variances*/
keep yeart yeartmj lag PEt covttmj wgtttmj;

/* Keep only covariances with lags of at least 10 years where a covariance could be calculated*/
keep if wgtttmj > 0;

/* Regress log covariances against year dummies and a second order polynomial in  tau (lag) and a (PE)*/
gen logcovttmj = ln(covttmj);

gen lagPEt = lag*PEt;
gen lagsq = lag^2;
gen PEtsq = PEt^2;


/* Year "dummy" equals (yeart = t) + (yeartmj = t) */
/* Note- when lag==0, year_`t' == 2  - this has effect of exp(predicted value) yielding the square of alphat when lag == 0*/
forvalues t = 1987(1)2009 {;
	qui gen year_`t' = (yeart == `t') + (yeartmj == `t');
};

/* Regress log covariances against year dummies and f(a,t), using only lags of 6 or more */
reg logcovttmj year_1987-year_2009 PEt PEtsq lagPEt lag lagsq if lag >= 6, noconstant;

/* predict age-specific permanent variance when lag=0 */
predict logpermvar;
keep if lag == 0;
gen permvar = exp(logpermvar);

/* calculate implied transitory variance */
gen transvar = covttmj - permvar;

/* Print average of transitory variance over all ages */
table yeart, c(m transvar m permvar m covttmj);

save temp2, replace ;
collapse (mean) transvar permvar covttmj, by(yeart) ;
outsheet using "`outputpath'\hhinc_LT_vars2_yr.txt", replace ;
clear ;
use temp2 ;

snapshot erase _all;

log close;
