/********************************************************
* SEPT 27, 2011
* Kawano, Ramnath, Tong
* UNCERTAINTY: ADDING ADDITIONAL YEARS TO THE PANEL
* USES ZIPPED INSOLE DATA 
* NOTES: 2009 DATA FROM G DRIVE DOES NOT UNZIP PROPERLY (updated 7/19/2011 to include 2009 data)
* COPY AND PASTE ZIP08 INTO C DRIVE, UNZIP, AND REZIP MANUALLY TO RUN DO FILE
* RUN OFF OF C DRIVE

*FOR 1987:
*RETIREMENT INCOME NOT SEPARATED FOR IRAS;
*FOR 1987-1989;
*NO SELF EMPLOYMENT TAX DEDUCTION (UNDER ADJUSTMENTS)
*************************
*TO DO:
1. CHECK RECID (FIGURE OUT WHICH TO PULL: SOO1, REC_ID, RECID, RETID)
*********************************************************/

#delimit;
clear;
clear matrix;
set mem 4000m;
set matsize 800;
set more off;
cap log close;

cd "C:\Research\Insole";
log using "D:\Panel8706\insolemerge.txt", text replace;


/* REC_ID variable (RECORD ID) changes names over the sample period */

/*SPECIFY WHICH YEAR SPECIFIC VARIABLES TO PULL*/

global var87 retid; global var88 retid e130 e140; global var89 retid e130 e140;

global var90 retid;  global var91 retid;  global var92 retid; 
global var93 retid;  global var94 retid;  global var95 rec_id; 
global var96 rec_id; global var97 s001;   global var98 s001; 
global var99 s001;   global var00 s001;   global var01 recid; 
global var02 recid;  global var03 rec_id; global var04 rec_id; 
global var05 rec_id; global var06 rec_id; global var07 rec_id stimind; 
global var08 rec_id; global var09 rec_id;

/*1987-1989*/
global allvar87_89 s006 s002 s003 flpdyr flpdmo mars  
 e10 e20 e30 e60 e90 e100 e120 e200 e210 e920 e1070 e710 e1060 e1190 e1210 e1220 e1200
 e480 e1090 e1100 e460 e40 e70 e80 e150 e170 e230 e250 e260 e265 e290 e445 e450 e580 e1940 e1950 e320 e325
 pprep pstate reject xocah xtot dsi scha schb schc schcf schd sche schf ; 

/* 1990, 1991: sdobyr, e00650 (may not exist), e11070, occpri, occsec, ral, dirdep, stimind do not exist */	
/**rename e00650 qualdiv;
*rename e11070 addctc;*/
	
/* Variables in every file */	
global allvar s006 s002 s003 dobyr flpdyr flpdmo mars e00200
	e00300 e00400 e00600 e00700 e00800 e00900 e01000 e01200 e01300 e01400 e01700
	e02000 e02100 e02300 e02500 e02600 e02650 e02900 e03260 e00100 e04450 e04500 e04600 e04800 
	e05800 e07100 e09200 e10600 e10700 e11900 e12100 e12200 e12000 e11000 e10900 e19400 e19500 e03150
	pprep pstate reject xocah xtot dsi scha schb schc schcf schd sche schf ;   
 
 *1987-1989 data;
foreach i in 7{;

	clear;
	unzipfile zip8`i', replace;

	foreach j in var8`i' {;	
	use $allvar87_89 $`j' using data\insole8`i';
	};

	gen year=198`i';
	replace flpdyr=flpdyr+1900;
	keep if year==flpdyr & reject==0;

	gen cwhsi_1 = mod(s002, 10000);
	gen cwhsi_ind = (cwhsi_1 ==2520| cwhsi_1 == 2545); 
	keep if cwhsi_ind == 1;

	sort s002 year;
	gen e130=.; 
	gen e140=.;
	gen dobyr=.;
	
	rename e130 iradist;
	rename e140 txiradist;
	rename e10 agi;
	rename e20 wage;
	rename e30 interest;
	rename e60 txdiv;
	rename e90 businc;
	rename e100 capgain;
	rename e200 scheinc;
	rename e120 othgain;
	rename e210 farminc;
	rename e920 liab;
	rename e1070 withld;
	rename e710 totcred;
	rename e1060 paymts; 
	rename e1190 balance;
	rename e1210 refund;
	rename e1220 penalty;
	rename e1200 futpmnts;
	rename e480 taxinc;
	rename e1090 estima;
	rename e1100 eic;
	rename e460 exempt;
	rename e40 taxexint;
	rename e70 taxref;
	rename e80 alimony;
	rename e150 totret;
	rename e170 txpension;
	rename e230 unempl;
	rename e250 socsec;
	rename e260 othinc;
	rename e265 totinc;
	rename e290 totadj;
	rename e445 stdded;
	rename e450 taxtable;
	rename e580 taxbfcred;
	rename e1940 hmi_fi;
	rename e1950 hmi_ind;
	egen iraded = rowtotal(e320 e325);
	
	save "data\insole8`i'", replace;
};


forvalues i=8(1)9{;

	clear;
	unzipfile zip8`i', replace;

	foreach j in var8`i' {;	
	use $allvar87_89 $`j' using data\insole8`i';
	};

	gen year=198`i';
	replace flpdyr=flpdyr+1900;
	keep if year==flpdyr & reject==0;

	gen cwhsi_1 = mod(s002, 10000);
	gen cwhsi_ind = (cwhsi_1 ==2520| cwhsi_1 == 2545); 
	keep if cwhsi_ind == 1;
	
	sort s002 year;
	
	gen dobyr=.;

	rename e130 iradist;
	rename e140 txiradist;
	rename e10 agi;
	rename e20 wage;
	rename e30 interest;
	rename e60 txdiv;
	rename e90 businc;
	rename e100 capgain;
	rename e200 scheinc;
	rename e120 othgain;
	rename e210 farminc;
	rename e920 liab;
	rename e1070 withld;
	rename e710 totcred;
	rename e1060 paymts; 
	rename e1190 balance;
	rename e1210 refund;
	rename e1220 penalty;
	rename e1200 futpmnts;
	rename e480 taxinc;
	rename e1090 estima;
	rename e1100 eic;
	rename e460 exempt;
	rename e40 taxexint;
	rename e70 taxref;
	rename e80 alimony;
	rename e150 totret;
	rename e170 txpension;
	rename e230 unempl;
	rename e250 socsec;
	rename e260 othinc;
	rename e265 totinc;
	rename e290 totadj;
	rename e445 stdded;
	rename e450 taxtable;
	rename e580 taxbfcred;
	rename e1940 hmi_fi;
	rename e1950 hmi_ind;
	egen iraded = rowtotal(e320 e325);

	save "data\insole8`i'", replace;
};


*1990-1999 data;
forvalues i=0(1)9{;

	clear;
	unzipfile zip9`i', replace;

	foreach j in var9`i' {;	
	use $allvar $`j' using data\insole9`i';
	};

	gen year=199`i';
	replace flpdyr=flpdyr+1900 if `i'<7;
	keep if year==flpdyr & reject==0;

	gen cwhsi_1 = mod(s002, 10000);
	gen cwhsi_ind = (cwhsi_1 ==2520| cwhsi_1 == 2545); 
	keep if cwhsi_ind == 1;
	
	sort s002 year;
	save "data\insole9`i'", replace;
};

*2000-2006, 2008 data;
foreach i in 0 1 2 3 4 5 6 7 8 9 {;
	clear;
	unzipfile zip0`i', replace;
	
	foreach j in var0`i' {;
	use $allvar $`j' using data\insole0`i';
	};
	gen year=200`i';

	keep if year==flpdyr & reject==0;
	
	gen cwhsi_1 = mod(s002, 10000);
	gen cwhsi_ind = (cwhsi_1 ==2520| cwhsi_1 == 2545); 
	keep if cwhsi_ind == 1;
	 
	sort s002 year;
	save "data\insole0`i'", replace;
};


*MERGE;
clear;
use data\insole90;
forvalues i=1(1)9 {;
	append using data\insole9`i';
};

forvalues i=0(1)9 {;
	append using data\insole0`i';
};



/* FIX REC_ID VARIABLE 
replace rec_id = s001 if year >= 1997 & year < 2001;
replace rec_id = recid if year >= 2001 & year <= 2002;
drop recid s001;
*/

/*RENAME VARIABLES*/
rename e00100 agi;
rename e00200 wage;
rename e00300 interest;
rename e00600 txdiv;
rename e00900 businc;
rename e01000 capgain;
rename e02000 scheinc;
rename e01200 othgain;
rename e02100 farminc;
rename e09200 liab;
rename e10700 withld;
rename e07100 totcred;
rename e10600 paymts; 
rename e11900 balance;
rename e12100 refund;
rename e12200 penalty;
rename e12000 futpmnts;
rename e04800 taxinc;
rename e10900 estima;
rename e11000 eic;
rename e04600 exempt;
rename e00400 taxexint;
rename e00700 taxref;
rename e00800 alimony;
rename e01300 iradist;
rename e01400 txiradist;
rename e01700 txpension;
rename e02300 unempl;
rename e02500 socsec;
rename e02600 othinc;
rename e02650 totinc;
rename e02900 totadj;
rename e03260 seded;
rename e04450 stdded;
rename e04500 taxtable;
rename e05800 taxbfcred;
rename e19400 hmi_fi;
rename e19500 hmi_ind;
rename e03150 iraded;

forvalues i=87(1)89{;
	append using data\insole`i';
};


gen long issnp = s002;

gsort issnp year -totinc;  
duplicates drop issnp year, force;

keep issnp year eic totinc;
sort issnp year;
save "D:\Panel8706\inequality_insole87_09.dta", replace;



capture log close;




