clear all
set more off
global spath   = "path/stata"
global rpath   = "path/results"
global sspath  = "path/stata_small"
global logpath = "path/log_files"
global blspath = "path/BLS_data"

set more off

local outtype = "_noout"
local outtype = ""
local ent = ""
local ent = "_noent"

global stub = "`ent'`outtype'_susb_bcont"

// **********
//  WEIGHTING
// **********

// *****************************************************************************************
//  Part 1: Take the files that have been collapsed to grid_date-indcodenum-size_code cells
//  	    Create weights for each variable serparately.  Save file with weights, and further
// 	    collapse to grid_date to create aggregate series
// *****************************************************************************************

local wgtlist="wgt6"
local emplist="tot_act tot_emp"
local firstyear=1
local firstemp=1

if 1 {
	foreach wgttype in `wgtlist' {
		local firstyear=1
		foreach emptype in `emplist' {
			disp("`wgttype'")
			disp("`emptype'")

			use $spath/all_and_cont_panel_cellsx$stub, clear

			keep if grid_date==td(15feb2020)
			capture drop wvar
			capture drop _merge

			gen wvar_`emptype' = `emptype' ~= .

			***********************************
			// If weighting, do a bunch of SUSB work.  If not, skip straight to the final collapse
			if ~strmatch("`wgttype'","unwgt") {

				//*******************
				// Start core weighting code: Weighting so that l_`emptype' matches BLS employment
				// Required variables: grid_date l_`emptype' indcodenum size_code
				//	d_`emptype' is included, and outputted, but not used or changed
				// Outputs: grid_date l_`emptype' indcodenum size_code d_`emptype' wvar_`emptype'
				gen year=year(grid_date)
				keep year grid_date vin_date `emptype' indcodenum size_code

				// **************************************************************************
				// MISSING WITHIN SECTOR CODE
				// Classify indcodenums as sectors if relevant
				//  sector_nomiss flags indcodenums within sector for which we know 2digitnaics
				gen sector_nomiss     = indcodenum
				replace sector_nomiss = 3133 if indcodenum >= 31 & indcodenum <= 33
				replace sector_nomiss = 4445 if indcodenum >= 44 & indcodenum <= 45
				replace sector_nomiss = 4849 if indcodenum >= 48 & indcodenum <= 49
				//  sector_all flags all indcodenum within sector
				gen sector_all     = indcodenum
				replace sector_all = 3033 if indcodenum >= 30 & indcodenum <= 33
				replace sector_all = 4345 if indcodenum >= 43 & indcodenum <= 45
				replace sector_all = 4749 if indcodenum >= 47 & indcodenum <= 49

				egen double sector_nomiss_emp = sum(`emptype'), by(grid_date vin_date size_code sector_nomiss)
				egen double sector_all_emp    = sum(`emptype'), by(grid_date vin_date size_code sector_all)

				// By sector-size, the fraction of sector emp is not missing trailing digits
				gen double pct_sector_nomiss_emp = sector_nomiss_emp / sector_all_emp
				// **************************************************************************

				save $spath/temp_w$stub, replace

				// Generate a file with all the grid_dates and vin_dates
				keep grid_date vin_date
				duplicates drop grid_date vin_date, force
				gen year=year(grid_date)
				save $spath/temp_grid$stub, replace

				// Collapse SUSB data to year-cell level
				use $blspath/susb_emp, clear

				collapse (sum) q1_avg_emplvl , by(indcodenum size_code)
				// set year = 2020 to mereg weights onto ADP cells
				gen year = 2020

				// Cartesian product: each grid_date gets a full set of BLS cells
				joinby year using $spath/temp_grid$stub // each grid_date-vin_date now has a full set of QCEW cells

				// Merge the ADP employment by grid_date-cell onto it
				display("Merging on size_code indcodenum grid_date vin_date")
				merge 1:1 size_code indcodenum grid_date vin_date using $spath/temp_w$stub, nogen

				//**************************************************************
				//SHRINK SECTORS ACCORDING TO THEIR AMOUNT OF MISSING 2 digit NAICS

				// Total SUSB employment in sector-size class (collapsing withing sectors)
				egen double qcew_sector_size_emp = sum(q1_avg_emplvl), by(grid_date vin_date sector_all size_code)

				// SUSB industry-size employment times ADP emp share with specific industry info in the sector-size cell
				gen double qn_emplvl = pct_sector_nomiss_emp*q1_avg_emplvl

				// For the ADP cells that only have sector-level industry (indcodenum=30, etc.)
				// fill in with something
				replace qn_emplvl    = pct_sector_nomiss_emp*qcew_sector_size_emp if q1_avg_emplvl == .
				//**************************************************************

				rename qn_emplvl  B_ns // Bls cell employment
				rename `emptype'  A_ns // Adp cell employment
				replace A_ns = 0 if A_ns==.
				bysort grid_date vin_date size_code: egen B_s=sum(B_ns) //  size class emp for this date

				display("here 1")
				// Adjustment for empty cells: some ADP cells are empty, so we have to drop.  Then we inflate the rest of the
				// weights to bring in the SUSB employment for the ADP-missing cells.
				// drop empty cells
				drop if A_ns==0
				// SUSB employment in each size class after dropping ADP empty cells
				bysort grid_date vin_date size_code: egen B_s_star=sum(B_ns)
				// Adjustment factor to account for missing cells
				gen adj = B_s/B_s_star
				* A_s is total ADP employment in the size class, excluding those with missing NAICS
				bysort grid_date vin_date size_code: egen A_s=sum(A_ns*(indcodenum~=.))
				* Copy the missing NAICS employment to the other rows.  A_s+A_m is total emp. in the size class
				bysort grid_date vin_date size_code: egen A_ms=sum(A_ns*(indcodenum==.))

				* Weight: NAICS within size class
				gen weight_ns=.
				replace weight_ns=B_ns/(A_ns+(A_ns/A_s)*A_ms) if indcodenum~=. // Weight of guys with NAICS data
				replace weight_ns=B_s_star/(A_s+A_ms) if indcodenum==. // Weight of guys with missing NAICS

				gen wvar_true = weight_ns*adj // Final weight, product of the two

				* Save the weights, and merge into the microdata
				keep wvar_true grid_date vin_date size_code indcodenum

				display("Merging on grid_date vin_date size_code indcodenum")
				merge 1:m grid_date vin_date size_code indcodenum using $spath/temp_w$stub, nogen
				replace wvar_true=0 if wvar_true==. // NAICS 92 and 99, dropped above
				capture drop wvar_`emptype'
				gen wvar_`emptype' = wvar_true // Overwrite the dummy weights from before
				drop wvar_true

				keep grid_date vin_date `emptype' wvar_`emptype' indcodenum size_code

				save ${spath}/weightfilex_`emptype'${stub}, replace

				// End core weighting code
				//*******************
				//*******************
			}

		}

	}
}

local wv="wgt6 unwgt"
local szlist="agg bysize byind"
local elist="emp act"

foreach sz of local szlist {
	foreach w of local wv {
		foreach e of local elist {

			display("e: `e'")

			use $spath/all_and_cont_panel_cellsx$stub, clear
			merge m:1 indcodenum size_code using $spath/weightfilex_tot_`e'$stub

			if "`w'"=="wgt6" {
				gen w=wvar_tot_`e'
			}
			if "`w'"=="unwgt" {
				gen w=1
			}

			if "`sz'"=="agg" {
				replace size_code=0
				replace indcodenum=0
			}

			if "`sz'"=="bysize" {

				replace indcodenum=0

				// Replace SUSB size code with appropriate size code for 1-49 | 50-499 | 500+ disclosure
				gen new_size=.
				replace new_size=1 if 1<=size_code & size_code<=2
				replace new_size=2 if 3<=size_code & size_code<=5
				replace new_size=3 if 6==size_code
				replace size_code=new_size
				drop new_size
			}


			if "`sz'"=="byind" {
				replace size_code=0
			}

			collapse (sum) death_`e' ever_dead_`e' birth_`e' cont_`e' lcont_`e' bcont_`e' bcontbase_`e'  sbcont_`e' sbcontbase_`e'  tot_`e' ///
				deadbase_`e' contbase_`e' reenterbase_`e' reenter_`e' enter_`e' [pw=w], by(grid_date vin_date size_code indcodenum)

			sort grid_date
			gen net_bd_`e'=birth_`e'-death_`e'

			capture: drop factor factor_temp
			gen factor_temp=tot_`e'
			replace factor_temp=0 if  grid_date~=td(15feb2020)
			bysort size_code indcodenum: egen factor=sum(factor_temp)

			gen norm_tot_`e'_`w'=100*tot_`e'/factor
			gen norm_death_`e'_`w'=100*death_`e'/factor
			gen norm_birth_`e'_`w'=100*birth_`e'/factor
			gen norm_ever_dead_`e'_`w'=100*ever_dead_`e'/factor
			gen norm_net_bd_`e'_`w'=100*net_bd_`e'/factor
			gen norm_bcont_`e'_`w'=100*bcont_`e'/factor

			gen cum_norm_net_bd_`e'_`w'=sum(norm_net_bd_`e'_`w')
			replace cum_norm_net_bd_`e'_`w'=cum_norm_net_bd_`e'_`w'+100

			gen norm_deadbase_`e'_`w'=100*deadbase_`e'/factor
			gen norm_contbase_`e'_`w'=100*contbase_`e'/factor
			gen norm_reenterbase_`e'_`w'=100*reenterbase_`e'/factor

			gen contrib_reenter_`e'_`w'=100*(reenter_`e'-reenterbase_`e')/factor // Main part
			gen contrib_bcont_`e'_`w'=100*(bcont_`e'-bcontbase_`e')/factor
			gen contrib_sbcont_`e'_`w'=100*(sbcont_`e'-sbcontbase_`e')/factor // Main part
			gen contrib_dead_`e'_`w' = -norm_deadbase_`e'_`w' // Main part
			gen contrib_birth_`e'_`w'=100*(enter_`e')/factor // Main part

			gen contrib_dead2_`e'_`w'=contrib_dead_`e'_`w' - 100*reenterbase_`e'/factor
			gen contrib_reenter2_`e'_`w'=100*(reenter_`e')/factor

			gen norm_bcontonly_`e'_`w'=100*bcont_`e'/bcontbase_`e'

			gen norm_sbcontonly_`e'_`w'=100*sbcont_`e'/sbcontbase_`e'

			gen death_rate_`e'_`w'=100*death_`e'/tot_`e'
			gen birth_rate_`e'_`w'=100*birth_`e'/tot_`e'

			save ${spath}/temp_collapse_`e'$stub, replace

		}

		use ${spath}/temp_collapse_emp$stub, clear
		merge 1:1 grid_date vin_date size_code indcodenum using ${spath}/temp_collapse_act$stub


		keep contrib* norm_tot_* norm_deadbase* norm_contbase* norm_reenterbase* norm_bcontonly* norm_sbcontonly*  ///
			death_rate* birth_rate* ///
			size_code indcodenum grid_date

		save ${spath}/temp_`sz'_w_`w'$stub, replace
	}


	// Export weighted and unweighted stuff
	use ${spath}/temp_`sz'_w_wgt6$stub, clear

	merge 1:1 grid_date size_code indcodenum using ${spath}/temp_`sz'_w_unwgt$stub

	sort grid_date size_code indcodenum

	if "`sz'"=="agg" {
		drop size_code indcodenum
	}
	if "`sz'"=="bysize" {
		drop indcodenum
	}

	if "`sz'"=="byind" {
		drop size_code
	}

	export excel using $rpath/all_and_cont_panel_`sz'${stub}.xlsx, replace firstrow(variables)
}



// Plot aggregates

use ${spath}/temp_agg_w_wgt6$stub, clear
merge 1:1 grid_date size_code using ${spath}/temp_agg_w_unwgt$stub


	twoway line norm_bcontonly_emp_wgt6 norm_bcontonly_act_wgt6 norm_tot_emp_wgt6 norm_tot_act_wgt6  grid_date if _n<_N, ///
		name("baseline_wgt6",replace) title("Weighted, allmargins and continuers")


	twoway line norm_bcontonly_emp_unwgt norm_bcontonly_act_unwgt norm_tot_emp_unwgt norm_tot_act_unwgt  grid_date if _n<_N, ///
		name("baseline_unwgt",replace) title("unweighted, allmargins and continuers")



 local ww="unwgt"
//local ww="wgt6"

use ${spath}/temp_agg_w_`ww'${stub}, clear

gen sum_s_contrib=contrib_sbcont_act_`ww' ///
	+contrib_birth_act_`ww' ///
	+contrib_dead_act_`ww' ///
	+contrib_reenter_act_`ww'

gen sum_contrib=contrib_bcont_act_`ww' ///
	+contrib_birth_act_`ww' ///
	+contrib_dead_act_`ww' ///
	+contrib_reenter_act_`ww'


gen nt= norm_tot_act_`ww'-100

order grid_date nt sum_contrib contrib_sbcont_act_`ww' contrib_birth_act_`ww' contrib_dead_act_`ww' contrib_reenter_act_`ww'
brow grid_date nt sum_contrib contrib_sbcont_act_`ww' contrib_birth_act_`ww' contrib_dead_act_`ww' contrib_reenter_act_`ww'

twoway line nt sum_s_contrib grid_date if _n<_N















/*
	twoway line death_rate_emp_unwgt birth_rate_emp_unwgt death_rate_emp_wgt6 birth_rate_emp_wgt6  grid_date if _n<_N, ///
		name("bd",replace) title("Birth and death rates")


	twoway line  norm_deadbase_emp_wgt6 norm_contbase_emp_wgt6 norm_reenterbase_emp_wgt6  grid_date if _n<_N, ///
		name("shares",replace) title("Weighted shares of Feb 15 employment")



	twoway line  contrib_bcont_emp_wgt6 contrib_birth_emp_wgt6 contrib_dead_emp_wgt6 contrib_reenter_emp_wgt6  grid_date if _n<_N, ///
		name("contrib",replace) title("Weighted contribs")
	twoway line  contrib_bcont_emp_wgt6 contrib_birth_emp_wgt6 contrib_dead2_emp_wgt6 contrib_reenter2_emp_wgt6  grid_date if _n<_N, ///
		name("contrib2",replace) title("Weighted contribs, v2")
*/


// By size code

use ${spath}/temp_bysize_w_wgt6$stub, clear
merge 1:1 grid_date size_code using ${spath}/temp_bysize_w_unwgt$stub

twoway (line norm_tot_emp_unwgt grid_date if size_code==1) ///
			(line norm_tot_emp_unwgt grid_date if size_code==2) ///
			(line norm_tot_emp_unwgt grid_date if size_code==3) ///
		, ///
		name("baseline_unwgt",replace) title("unweighted, allmargins and continuers")


twoway (line norm_tot_emp_wgt6 grid_date if size_code==1) ///
			(line norm_tot_emp_wgt6 grid_date if size_code==2) ///
			(line norm_tot_emp_wgt6 grid_date if size_code==3) ///
		, ///
		name("baseline_wgt",replace) title("weighted, allmargins and continuers")


// ****************************************
// Make figure-specific files
// ****************************************

// Figure 2
use ${spath}/temp_agg_w_wgt6$stub, clear
merge 1:1 grid_date size_code using ${spath}/temp_agg_w_unwgt$stub
keep grid_date norm_bcontonly_emp_wgt6 norm_bcontonly_act_wgt6 norm_sbcontonly_emp_wgt6 norm_sbcontonly_act_wgt6 norm_tot_emp_wgt6 norm_tot_act_wgt6

order grid_date
export excel using $rpath/fig2${stub}.xlsx, replace firstrow(variables)

// Figure 3 and 8
use ${spath}/temp_bysize_w_wgt6$stub, clear
keep grid_date size_code norm_tot_emp_wgt6 norm_tot_act_wgt6 norm_deadbase_emp_wgt6 norm_reenterbase_emp_wgt6
reshape wide norm_tot_emp_wgt6 norm_tot_act_wgt6 norm_deadbase_emp_wgt6 norm_reenterbase_emp_wgt6, i(grid_date) j(size_code)
preserve
	keep grid_date norm_tot_emp_wgt6* norm_tot_act_wgt6*
	sort grid_date

	export excel using $rpath/fig3${stub}.xlsx, replace firstrow(variables)

	twoway (line norm_tot_emp_wgt6* grid_date if _n<_N) ///
			, ///
			name("size_emp_wgt",replace) title("weighted paid, by size")

	twoway (line norm_tot_act_wgt6* grid_date if _n<_N) ///
			, ///
			name("size_act_wgt",replace) title("weighted active, by size")
restore

// Add exit and reentry margins for aggregate
merge 1:1 grid_date using ${spath}/temp_agg_w_wgt6$stub
// keep on exit and reentry margins, for both aggregate and by size class
keep grid_date norm_deadbase_emp_wgt6* norm_reenterbase_emp_wgt6*
sort grid_date

export excel using $rpath/fig8${stub}.xlsx, replace firstrow(variables)


twoway (line norm_deadbase_emp_wgt6 norm_reenterbase_emp_wgt6 grid_date if _n<_N) ///
		, ///
		name("size_emp_xre_wgt",replace) title("weighted, exit and reentry, agg")
twoway (line norm_deadbase_emp_wgt61 norm_reenterbase_emp_wgt61 grid_date if _n<_N) ///
		, ///
		name("size_emp_xre1_wgt",replace) title("weighted, exit and reentry, size 1")
twoway (line norm_deadbase_emp_wgt62 norm_reenterbase_emp_wgt62 grid_date if _n<_N) ///
		, ///
		name("size_emp_xre2_wgt",replace) title("weighted, exit and reentry, size 2")
twoway (line norm_deadbase_emp_wgt63 norm_reenterbase_emp_wgt63 grid_date if _n<_N) ///
		, ///
		name("size_emp_xre3_wgt",replace) title("weighted, exit and reentry, size 3")


// Table 1
use ${spath}/temp_byind_w_wgt6$stub, clear
gen period=""
replace period="base" if grid_date==td(15feb2020)
replace period="trough" if grid_date==td(25apr2020)
replace period="end" if grid_date==td(20jun2020)

keep if period=="base" | period=="trough" | period=="end"
keep period indcodenum norm_tot_emp_wgt6
rename norm_tot_emp_wgt6 emp

reshape wide emp, i(indcodenum) j(period) string

gen base_to_trough_growth=100*(emptrough-empbase)/empbase
gen base_to_end_growth=100*(empend-empbase)/empbase

keep indcodenum *growth
export excel using $rpath/tab1${stub}.xlsx, replace firstrow(variables)


