***********************************************************
* Labor Income Dynamics project
* 
***********************************************************
* This program reads in a raw PSID file
* which contains records from the 1971-2009 PSID surveys
*
* It creates variables and sample selection consistent with 
* what we did in the tax data
*
*************************************************************

# delimit ;
set more 1;
set logtype text;
clear all;

*PARAMETERS;
global RawDataFile /Users/jasondebacker/Econ/Research/IncomeInequality/PSID_data_file/MaleEarnings_PSID.dta;
global OutputDir /Users/jasondebacker/Econ/Research/IncomeInequality/PSID_data_file ;

log using $OutputDir/PSID_maleearnings_data_setup.log, replace ;

global MinAge 25;
global MaxAge 60;
global MinExper 1; 
global MaxExper 100; *38;
global FirstYear 1971; *this is survey year;

cd $OutputDir;
use $RawDataFile, clear;
summ;

rename Age age ;
replace age = age + 24 ;

*generate potential experience ;
gen PE = age - $MinAge + 1 ;


*deflate nominal variables;
*** Personal Consumption Expenditure Deflator.  Taken from
* Economic Report of the President, 2009, Appendix B,
* STATISTICAL TABLES RELATING TO INCOME, EMPLOYMENT, AND PRODUCTION
* TABLE B-7.Chain-type price indexes for gross domestic product, 1959-2003
* [Index numbers, 2000=100];
* 2007-2009 are from Table B-7 of the 2012 Report

scalar pce1967= 23.237/(100);
scalar pce1968= 24.151/(100);
scalar pce1969= 25.255/(100);

scalar pce1970= 26.448/(100);
scalar pce1971= 27.574/(100);
scalar pce1972= 28.528/(100);
scalar pce1973= 30.081/(100);
scalar pce1974= 33.191/(100);
scalar pce1975= 35.955/(100);
scalar pce1976= 37.948/(100);
scalar pce1977= 40.410/(100);
scalar pce1978= 43.248/(100);
scalar pce1979= 47.059/(100);

scalar pce1980= 52.078/(100);
scalar pce1981= 56.720/(100);
scalar pce1982= 59.859/(100);
scalar pce1983= 62.436/(100);
scalar pce1984= 64.795/(100);
scalar pce1985= 66.936/(100);
scalar pce1986= 68.569/(100);
scalar pce1987= 70.947/(100);
scalar pce1988= 73.755/(100);
scalar pce1989= 76.972/(100);

scalar pce1990= 80.498/(100);
scalar pce1991= 83.419/(100);
scalar pce1992= 85.824/(100);
scalar pce1993= 87.804/(100);
scalar pce1994= 89.654/(100);
scalar pce1995= 91.577/(100);
scalar pce1996= 93.547/(100);
scalar pce1997= 95.124/(100);
scalar pce1998= 95.978/(100);
scalar pce1999= 97.575/(100);

scalar pce2000= 100.000/(100);
scalar pce2001= 102.094/(100);
scalar pce2002= 103.548/(100);
scalar pce2003= 105.597/(100);
scalar pce2004= 108.392/(100);
scalar pce2005= 111.581/(100);
scalar pce2006= 114.675/(100);
scalar pce2007= ((105.499*111.581)/100)/(100);
scalar pce2008= ((108.943*111.581)/100)/(100);
scalar pce2009= ((109.169*111.581)/100)/(100);

* Assign deflator based on tax year;
gen pce = 0;
forvalues i = 1971(1)2009 {;
	replace pce = pce`i' if (year==`i');
};


** Average hourly wage deflator;
* Taken from http://data.bls.gov/PDQ/servlet/SurveyOutputServlet;
* [Index numbers, 2004=15.69];

scalar wagedef1979= 6.34/(15.69);

scalar wagedef1980= 6.85/(15.69);
scalar wagedef1981= 7.44/(15.69);
scalar wagedef1982= 7.87/(15.69);
scalar wagedef1983= 8.2/(15.69);
scalar wagedef1984= 8.49/(15.69);
scalar wagedef1985= 8.74/(15.69);
scalar wagedef1986= 8.93/(15.69);
scalar wagedef1987= 9.14/(15.69);
scalar wagedef1988= 9.44/(15.69);
scalar wagedef1989= 9.80/(15.69);

scalar wagedef1990= 10.20/(15.69);
scalar wagedef1991= 10.52/(15.69);
scalar wagedef1992= 10.77/(15.69);
scalar wagedef1993= 11.05/(15.69);
scalar wagedef1994= 11.34/(15.69);
scalar wagedef1995= 11.65/(15.69);
scalar wagedef1996= 12.04/(15.69);
scalar wagedef1997= 12.51/(15.69);
scalar wagedef1998= 13.01/(15.69);
scalar wagedef1999= 13.49/(15.69);

scalar wagedef2000= 14.02/(15.69);
scalar wagedef2001= 14.54/(15.69);
scalar wagedef2002= 14.97/(15.69);
scalar wagedef2003= 15.37/(15.69);
scalar wagedef2004= 15.69/(15.69);
scalar wagedef2005= 16.13/(15.69);
scalar wagedef2006= 16.76/(15.69);  
scalar wagedef2007= 17.43/(15.69);
scalar wagedef2008= 18.08/(15.69);
scalar wagedef2009= 18.63/(15.69);

* Assign deflator based on tax year;
gen wagedef = 0;
forvalues i = 1979(1)2009 {;
	replace wagedef = wagedef`i' if (year==`i');
};

* Deflate nominal variables;
gen wagedefLabIncHD = LabIncHD/wagedef;
gen wagedefWages2HD = Wages2HD/wagedef;
replace LabIncHD = LabIncHD/pce;
replace Wages2HD = Wages2HD/pce;




/** Create some variables had in tax data **/
/******* Form other variables ****************/

* create year dummies for summarizing;
forvalues i = 1987(1)2009 {;
	gen yr`i'=(year==`i');
};



* Select on file year;
keep if year >= 1987 & year <= 2009 ;


drop yr* ;
* create year dummies for summarizing;
forvalues i = 1987(1)2009 {;
	gen yr`i'=(year==`i');
};



/*****************************/
/** Sample Selection **/
/***********************/
gen issnm = pid ; * to make id variable name consistent for other files ;

** NOTE: Ivan already made the sample selection based on age and gender ;

* Drop extraneous observations and those with no males;
drop if issnm == .;
*drop if issnm == 0 ; 
duplicates drop issnm year, force;

save temp1, replace ; 

* Include only individuals with greater than $2575K in wages in wage deflated 2004 $; 
keep if wagedefLabIncHD > 2575 & wagedefLabIncHD ~= .;


tab year; 
* Declare as a panel;

tis year;
iis issnm;

xtset issnm year;

save PSID_LabIncHD_Sample.dta, replace;



use temp1, clear ;

* Include only individuals with greater than $2575K in wages in wage deflated 2004 $; 
keep if wagedefWages2HD > 2575 & wagedefWages2HD ~= .;

tab year; 
* Declare as a panel;

tis year;
iis issnm;

xtset issnm year;

save PSID_Wages2HD_Sample.dta, replace;




capture log close ;
