***Download the basic monthly CPS files from Jan 1976 to Oct 2018
***The raw data files are available online at http://www.nber.org/data/cps_basic.html
***Set the directory to the one containing the raw CPS files  
***The montly files must be extracted and named cpsbYYYYMM.raw
***The following code is used to extract data from raw CPS files
***The code is edited based on the extraction code written in Shimer (2012)

********************************************************************************
* program: extract.do
* date: 11/9/2018
* purpose: extract data from raw CPS files
********************************************************************************

clear all
cap log close

version 7.0
set more 1
clear
set mem 50m
log using log_extract, replace

gl base "/Volumes/Jin/CPS/unemp_count_Nov2018/"		// set path
cd "${base}/data"
gl out "${base}/output"
gl data "${base}/data"

**Exract the data from the raw CPS files
local x=197601

while `x' <=197712 {
clear
infix hh 4-8 hh1 9-12 hh2 25-26 state 17-18 line 94-95 mis 2 age 97-98 race 100 sex 101 status 109 str dur 66-67 double fweight 121-132 educ 103-104 grade 105 mar 99 str ind 88-90 str occu 91-93 using cpsb`x'.raw
generate double hh3 = hh*1000000+hh1*100+hh2
generate educ1 = educ-grade+1
drop hh hh1 hh2 educ
rename hh3 hh
rename educ1 educ
replace dur =" ." if dur =="--"
replace ind ="." if ind =="---"
replace occu ="." if occu =="---"

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}

while `x' <=198212 {
clear
infix double hh 4-15 state 17-18 line 94-95 mis 2 age 97-98 race 100 sex 101 status 109 str dur 66-67 double fweight 121-132 educ 103-104 grade 105 mar 99 str ind 88-90 str occu 91-93 using cpsb`x'.raw
generate educ1 = educ-grade+1
drop educ
rename educ1 educ
replace dur =" ." if dur =="--"
replace ind ="." if ind =="---"
replace occu ="." if occu =="---"

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}

while `x' <=198312 {
clear
infix  double hh 4-15 state 17-18 line 94-95 mis 2 age 97-98 race 100 sex 101 status 109 str dur 66-67 double fweight 121-132 educ 103-104 grade 105 mar 99 str ind 524-526 str occu 527-529 using cpsb`x'.raw
generate educ1 = educ-grade+1
drop educ
rename educ1 educ
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}

while `x' <=198812 {
clear
infix  double hh 4-15 state 17-18 line 541-542 mis 2 age 97-98 race 100 sex 101 status 109 str dur 66-67 double fweight 121-132 educ 103-104 grade 105 mar 99 str ind 524-526 str occu 527-529 using cpsb`x'.raw
generate educ1 = educ-grade+1
drop educ
rename educ1 educ
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}

while `x' <=199112 {
clear
infix  double hh 145-156 state 114-115 line 264-265 mis 70 age 270-271 race 280 sex 275 status 348 str dur 304-305 double fweight 398-405 double lweight 576-583 llind 584 educ 277-278 grade 279 mar 272 str ind 310-312 str occu 313-315 using cpsb`x'.raw
generate educ1 = educ-grade+1
drop educ
rename educ1 educ
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}

while `x' <=199312 {
clear
infix  double hh 145-156 state 114-115 line 264-265 mis 70 age 270-271 race 280 sex 275 status 348 str dur 304-305 double fweight 398-405 double lweight 576-583 llind 584 educ 277-278 mar 272 str ind 310-312 str occu 313-315 using cpsb`x'.raw
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}
*******************?????????
while `x' <=199505 {
clear
infix gestfips 93-94 double hrhhid 1-12 state 91-92 str hrsersuf 75-76 line 147-148 mis 63-64 age 122-123 race 139-140 sex 129-130 status 180-181 str dur 407-409 double fweight 613-622 double lweight 593-602 llind 69-70 educ 137-138 mar 125-126 str ind 436-438 str occu 439-441 using cpsb`x'.raw
generate z=real(hrsersuf)
replace z=0 if hrsersuf=="-1"
replace z=1 if hrsersuf=="A"
replace z=2 if hrsersuf=="B"
replace z=3 if hrsersuf=="C"
replace z=4 if hrsersuf=="D"
replace z=5 if hrsersuf=="E"
replace z=6 if hrsersuf=="F"
replace z=7 if hrsersuf=="G"
replace z=8 if hrsersuf=="H"
replace z=9 if hrsersuf=="I"
replace z=10 if hrsersuf=="J"
replace z=11 if hrsersuf=="K"
replace z=12 if hrsersuf=="L"
replace z=13 if hrsersuf=="M"
replace z=14 if hrsersuf=="N"
replace z=15 if hrsersuf=="O"
replace z=16 if hrsersuf=="P"
replace z=17 if hrsersuf=="Q"
replace z=18 if hrsersuf=="R"
replace z=19 if hrsersuf=="S"
replace z=20 if hrsersuf=="T"
replace z=21 if hrsersuf=="U"
replace z=22 if hrsersuf=="V"
replace z=23 if hrsersuf=="W"
replace z=24 if hrsersuf=="X"
replace z=26 if hrsersuf=="Y"
replace z=25 if hrsersuf=="Z"
generate double hh=gestfips*100000000000000+hrhhid*100+z
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"
drop hrhhid gestfips hrsersuf

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}


while `x' <=200212 {
clear
infix double hrhhid 1-15 str hrsersuf 75-76 state 91-92 line 147-148 mis 63-64 age 122-123 race 139-140 sex 129-130 status 180-181 str dur 407-409 double fweight 613-622 double lweight 593-602 llind 69-70 educ 137-138 mar 125-126 str ind 436-438 str occu 439-441 using cpsb`x'.raw
generate z=real(hrsersuf)
replace z=0 if hrsersuf=="-1"
replace z=1 if hrsersuf=="A"
replace z=2 if hrsersuf=="B"
replace z=3 if hrsersuf=="C"
replace z=4 if hrsersuf=="D"
replace z=5 if hrsersuf=="E"
replace z=6 if hrsersuf=="F"
replace z=7 if hrsersuf=="G"
replace z=8 if hrsersuf=="H"
replace z=9 if hrsersuf=="I"
replace z=10 if hrsersuf=="J"
replace z=11 if hrsersuf=="K"
replace z=12 if hrsersuf=="L"
replace z=13 if hrsersuf=="M"
replace z=14 if hrsersuf=="N"
replace z=15 if hrsersuf=="O"
replace z=16 if hrsersuf=="P"
replace z=17 if hrsersuf=="Q"
replace z=18 if hrsersuf=="R"
replace z=19 if hrsersuf=="S"
replace z=20 if hrsersuf=="T"
replace z=21 if hrsersuf=="U"
replace z=22 if hrsersuf=="V"
replace z=23 if hrsersuf=="W"
replace z=24 if hrsersuf=="X"
replace z=26 if hrsersuf=="Y"
replace z=25 if hrsersuf=="Z"
generate double hh=hrhhid*100+z
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"
drop hrhhid hrsersuf

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}


while `x' <=200501 {
clear
infix double hrhhid 1-15 str hrsersuf 75-76 state 91-92 line 147-148 mis 63-64 age 122-123 race 139-140 sex 129-130 status 180-181 str dur 407-409 double fweight 613-622 double lweight 593-602 llind 69-70 educ 137-138 mar 125-126 str ind 856-859 str occu 860-863 using cpsb`x'.raw
generate z=real(hrsersuf)
replace z=0 if hrsersuf=="-1"
replace z=1 if hrsersuf=="A"
replace z=2 if hrsersuf=="B"
replace z=3 if hrsersuf=="C"
replace z=4 if hrsersuf=="D"
replace z=5 if hrsersuf=="E"
replace z=6 if hrsersuf=="F"
replace z=7 if hrsersuf=="G"
replace z=8 if hrsersuf=="H"
replace z=9 if hrsersuf=="I"
replace z=10 if hrsersuf=="J"
replace z=11 if hrsersuf=="K"
replace z=12 if hrsersuf=="L"
replace z=13 if hrsersuf=="M"
replace z=14 if hrsersuf=="N"
replace z=15 if hrsersuf=="O"
replace z=16 if hrsersuf=="P"
replace z=17 if hrsersuf=="Q"
replace z=18 if hrsersuf=="R"
replace z=19 if hrsersuf=="S"
replace z=20 if hrsersuf=="T"
replace z=21 if hrsersuf=="U"
replace z=22 if hrsersuf=="V"
replace z=23 if hrsersuf=="W"
replace z=24 if hrsersuf=="X"
replace z=26 if hrsersuf=="Y"
replace z=25 if hrsersuf=="Z"
generate double hh=hrhhid*100+z
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"
drop hrhhid hrsersuf

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}



while `x' <=201810 {
clear
infix double hrhhid 1-15 str hrsersuf 75-76 state 93-94 line 147-148 mis 63-64 age 122-123 race 139-140 sex 129-130 status 180-181 str dur 407-409 double fweight 613-622 double lweight 593-602 llind 69-70 educ 137-138 mar 125-126 str ind 856-859 str occu 860-863 using cpsb`x'.raw
generate z=real(hrsersuf)
replace z=0 if hrsersuf=="-1"
replace z=1 if hrsersuf=="A"
replace z=2 if hrsersuf=="B"
replace z=3 if hrsersuf=="C"
replace z=4 if hrsersuf=="D"
replace z=5 if hrsersuf=="E"
replace z=6 if hrsersuf=="F"
replace z=7 if hrsersuf=="G"
replace z=8 if hrsersuf=="H"
replace z=9 if hrsersuf=="I"
replace z=10 if hrsersuf=="J"
replace z=11 if hrsersuf=="K"
replace z=12 if hrsersuf=="L"
replace z=13 if hrsersuf=="M"
replace z=14 if hrsersuf=="N"
replace z=15 if hrsersuf=="O"
replace z=16 if hrsersuf=="P"
replace z=17 if hrsersuf=="Q"
replace z=18 if hrsersuf=="R"
replace z=19 if hrsersuf=="S"
replace z=20 if hrsersuf=="T"
replace z=21 if hrsersuf=="U"
replace z=22 if hrsersuf=="V"
replace z=23 if hrsersuf=="W"
replace z=24 if hrsersuf=="X"
replace z=26 if hrsersuf=="Y"
replace z=25 if hrsersuf=="Z"
generate double hh=hrhhid*100+z
replace dur="." if dur=="--"
replace ind="." if ind=="---"
replace occu="." if occu=="---"
drop hrhhid hrsersuf

compress
save cps`x'.dta, replace

local x = `x' + 1
if (`x'-13)/100 == int((`x'-13)/100) {
    local x = `x' + 88
    }
}

log close
