* This code generates the dataset for the scatterplots from scratch
* Documents needed: 1. The full gss-dataset straight from the homepage
* 		    2. A csv file called urates containing the unemployment rate for overall and prime age
* The output it produces can be read into scatterplots.do which then creates the plots
* The first part applies the sample selection criteria and keeps the variables needed
* The second part creates the series, I document the construction of joblose, the construction of jobfind (below) is equivalent
* I then merge and insheet a csv file with the relevant unemployment rates to complete the dataset.


set more off
clear all 
set mem 10000000
set maxvar 30000

** The outer loop is for the two different samples
local sample all pa
foreach sam of local sample {
**load main dataset
use gss7210_r1_1.dta, clear

keep id year wrkstat occ wrkslf wrkgovt occ80 age joblose jobfind lastslf wtssall sample
**SAMPLE SELECTION CRITERIA
** Retain people who are 18-64 years old drop the young and old
drop if age < 18
drop if age > 64
** Retain employed persons drop those who are not working
drop if wrkstat > 3
** Retain employees; drop the self-employed
drop if lastslf == 1
drop if wrkslf == 1
** Drop active-duty service members
drop if occ == 590
drop if occ80 == 431
** Drop the black oversamples in the 1982 and 1987 surveys
drop if sample ==4 | sample ==5 | sample ==7

**** FOR THE PRIME AGE SAMPLE
if "`sam'"=="pa" {
	drop if age<25
	drop if age>54
	}

keep year age job* wtssall

gen responses=.
gen likely=.
gen yes=.
save "temp`sam'.dta", replace

forval y=1977/2010 {
	use "temp`sam'.dta", clear
	count if year==`y'
	local r=r(N)
	if `r'!=0 {
** count number of answers to the find question and saves their number as responses	
	keep if (year==`y' & inlist(jobfind,1,2,3))
** count number of "not easy"	
	replace yes=1 if inlist(jobfind,3)
	replace yes=0 if yes!=1
** take the sum applying the weights	
	sum yes [aw=wtssall]
	local f=r(mean)
	replace yes=`f'
	if year==1977 {
		save "ratios`sam'.dta", replace
		}
	else {
		append using "ratios`sam'.dta"
		save "ratios`sam'.dta", replace
		}
		}
	local `y'=`y'+1
	}

	
use ratios`sam'.dta, clear

collapse (mean) yes, by(year)
drop if yes==.
rename yes jobfind`sam'
save jobfind`sam'.dta, replace
}

***********************JOBLOSE*************************************************************

local sample all pa
foreach sam of local sample {
use gss7210_r1_1.dta, clear

keep id year wrkstat occ wrkslf wrkgovt occ80 age joblose jobfind lastslf wtss* sample
**** Retain people who are 18-64 years old drop the young and old
drop if age < 18
drop if age > 64
** Retain employed persons drop those who are not working
drop if wrkstat > 3
** Retain employees; drop the self-employed
drop if lastslf == 1
drop if wrkslf == 1
** Drop active-duty service members
drop if occ == 590
drop if occ80 == 431
** Drop the black oversamples in the 1982 and 1987 surveys
drop if sample ==4 | sample ==5 | sample ==7

if "`sam'"=="pa" {
	drop if age<25
	drop if age>54
	}
keep year age job* wtssall

gen responses=.
gen likely=.
gen yes=.
save "temp`sam'.dta", replace



***** The first part of the loop just checks whether the year is in the sample
forval y=1977/2010 {
	use "temp`sam'.dta", clear
	count if year==`y'
	local r=r(N)
	if `r'!=0 {
****** the next part counts how many people answered the JOBLOSE question with one of the four possible answers	and saves their number as responses	
	keep if (year==`y' & inlist(joblose,1,2,3,4))
	replace yes=1 if inlist(joblose,1,2)
	replace yes=0 if yes!=1
	sum yes [aw=wtssall]
	local f=r(mean)
	replace yes=`f'
	if year==1977 {
		save "ratios`sam'.dta", replace
		}
	else {
		append using "ratios`sam'.dta"
		save "ratios`sam'.dta", replace
		}
		}
	local `y'=`y'+1
	}

	
use ratios`sam'.dta, clear

collapse (mean) yes, by(year)
drop if yes==.
list year yes
rename yes joblose`sam'
save joblose`sam'.dta, replace
}

clear all
use jobfindall.dta
merge 1:1 year using jobfindpa.dta
drop _merge
merge 1:1 year using jobloseall.dta
drop _merge
merge 1:1 year using joblosepa.dta
drop _merge
save jobseries.dta, replace

clear all
insheet using urates.csv, comma
rename ur_all_oct_sep curcont_oct_sep
rename ur_pa_oct_sep paurcont_oct_sep
rename ur_all_dec_nov curcont_dec_nov
rename ur_pa_dec_nov paurcont_dec_nov
rename ur_all_jan_may curcont
rename ur_pa_jan_may paurcont


merge 1:1 year using jobseries.dta
keep if _merge==3
drop _merge

gen joblose = 100*jobloseall
gen joblose_pa = 100*joblosepa
gen jobfind =100* jobfindall
gen jobfind_pa =100* jobfindpa
drop jobloseall joblosepa jobfindall jobfindpa 

save data_complete.dta, replace
outsheet using datacomplete.csv, comma replace

*cleanup
erase jobfindall.dta 
erase jobfindpa.dta 
erase jobloseall.dta 
erase joblosepa.dta 
erase temppa.dta 
erase tempall.dta
erase ratiosall.dta
erase ratiospa.dta
erase jobseries.dta






