clear all 
set more off
do Paths


/*****************************************************************************************************
This do-file merge and append 5500 pension report data from DOL. Basic plan data are in F_5500. F_SCH_H contains asset breakdown. See http://www.dol.gov/ebsa/5500main.html for for instructions which describe the variables.

*****************************************************************************************************/
cap log close
log using "$BasePath/logs/5500-merge.log", replace

/*****************************************************************************************************
1. Merge data and Append Data F_5500 F_SCH_H  F_SCH_MB F_SCH_SB F_SCH_B
*****************************************************************************************************/
clear all
tempfile F
tempfile temp
local first ""
forvalues yyyy = 2004/2012{
	local formlist F_5500 F_SCH_H 
	if `yyyy'>2008 {
		local formlist `" `formlist' F_SCH_MB F_SCH_SB"'
	}
	else if `yyyy'<2008 {
		local formlist `" `formlist' F_SCH_B"'
	}
	if `yyyy'<2009 {
		local mergevar filing_id
	}
	else {
		local mergevar ack_id
	}
	foreach form of local formlist {
		qui unzipfile `"$DataPath/5500s/`form'_`yyyy'"', replace
	}
	use F_5500_`yyyy', clear
	qui desc, short varlist
	local F_5500_vars `"`=r(varlist)'"'
	local all_vars `"`F_5500_vars'"'
	/* Merge forms within the same year except D*/
	local formsmerge `formlist'
	local formsmerge =regexr(`"`formsmerge'"',`"F_5500"',`""')
	local formsmerge =regexr(`"`formsmerge'"',`"F_SCH_D_Part1"',`""')
	local formsmerge =regexr(`"`formsmerge'"',`"F_SCH_D_Part2"',`""')
	foreach form of local formsmerge {
		di `"`form'"'
		duplicates drop `mergevar', force
		if "`form'"=="F_SCH_H" {
			merge 1:m `mergevar' using `form'_`yyyy',  update
			keep if _merge==3
		}
		else if "`form'"=="F_SCH_B" {
			merge 1:m `mergevar' using `form'_`yyyy',  update 
			gen flag_employer_multi=1 if _merge>=3
		}
		else if "`form'"=="F_SCH_MB" {
			merge 1:m `mergevar' using `form'_`yyyy',  update 
			gen flag_employer_multi=1 if _merge>=3
		}
		else if "`form'"=="F_SCH_SB"{
			merge 1:m `mergevar' using `form'_`yyyy',  update 
			gen flag_employer_single=1 if _merge>=3
			/* Half of the observations are in the using F_SCH_SB but not in the master in 2009-2012. Might come from SF. In any case the match is terrible while it is ok for F_SCH_B. Better to use them */
		}
		else{
			merge 1:m `mergevar' using `form'_`yyyy',  update 
		}
		drop if _merge==2
		drop _merge
	} 	

	/* Fixes to individual years for conformity */
	if `yyyy'==2007 {
		qui replace admin_zip_code = "" if regexm(admin_zip_code,`"SAME"')
	}
	if `yyyy'==2008 {
		foreach party in spons_dfe admin preparer {
			qui replace `party'_zip_code = `""' if regexm(`party'_zip_code,`"[A-Z]"')
			qui replace `party'_phone_num = `"6176645495"' if `party'_phone_num==`"617664549S"'
		}
		qui replace party_in_int_not_rptd_ind = "" if party_in_int_not_rptd_ind=="E"
	}
	if `yyyy'>=2009 {
		foreach party in spons admin {
			qui replace `party'_signed_date = substr(`party'_signed_date,1,10)
		}
	}	
	qui ds *date*, not
	qui destring `r(varlist)', replace ignore(`" "' `"~"' `"/"' `"."' `","' `"-"' `"^"') 
	
	foreach date of varlist *date*  form_tax_prd{
		capture confirm numeric variable `date'
		if !_rc{
			qui tostring `date', force replace
		}
		rename `date' `date'_s
		qui gen `date'=date(`date'_s,`"YMD"')
		qui format `date' %td
		drop `date'_s
	}

	qui gen year_dataset=`yyyy'
	compress
	if "`first'"==""{
		qui save `F', replace
		local first no
	}
	else{
		/* Append converting everything into 2006 formats */
		save `temp', replace
		use `F', clear
		append using `temp', force
		qui save `F', replace
	}
	
	foreach form of local formlist{
		qui erase `form'_`yyyy'.dta
	} 
}


/* Saving */
qui save 5500-merged, replace
zipfile 5500-merged.dta, saving("$DataPath/5500-merged", replace)
*qui erase 5500-merged.dta

log close




