/****************************************************************************
*The following .do file identifies likelihood of a firm shutting down and/or
reopening by firm characteristics, namely size and growth rate.



****************************************************************************/

cap log close firmlevel_prob_log
local date: display %tdCCYY-NN-DD =daily("`c(current_date)'", "DMY")
log using "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/logs/04_firmlevel_prob_`date'.log", ///
	replace name(firmlevel_prob_log)


set more off
set varabbrev off

loc dir 	"/accounts/projects/jrothst/homebase/data/bpea_replication_archive/"
loc code	"`dir'/code/"
loc results	"`dir'/results/"
loc sourcedata	"`dir'/data_raw/"
loc workingdata	"`dir'/data_clean/"


********************************************************************************
*Open and clean 2018-2019 file 
********************************************************************************

*Create 2019 dataset for growth rate analyses
use	sthud 		///
	ind 		///
	company_id 	///
	user_id 	///
	event_date 	///
	location_id 	///
	msachud		///
	stfipshud	///
	hours_worked 	///
	using 		///
	"/accounts/projects/jrothst/homebase/data/Data_clean/homebase_raw_pre2020.dta", clear
	
 compress

 ren sthud	state
 ren stfipshud	stfips
 ren msachud	msac
 ren ind	industry
 ren company_id	firm
 ren user_id	person
 egen establishment = group(firm location_id)
 
 drop if state==.
 drop if msac==.
 
 
 gsort firm location_id industry msac state person event_date
*unique firm
*unique establishment
*unique person

*isid firm establishment person day


*Keep obs with reasonable hours, those in the US, those with real stfps
 keep if inrange(hours_worked,0,20)
 drop if inlist(stfips,98,99)
 
*Only keep the Jan 2019 data 
 keep if inrange(event_date,date("01012019","MDY"),date("01312019","MDY"))


*Collapse the data to find all hours each person worked in January 2019
 gcollapse	(sum)	hours_worked			///
		, by(firm industry msac state person)
		
*Create person level indicator for workingdata
 gen anywork=(hours_worked>0)
 
*Collapse the data to find the number of workers and hours worked in January 2019
 
 gcollapse	(sum)	total_hours_jan19=hours_worked		///
			num_workers_jan19=anywork		///
		, by(firm industry msac state)
		
*Check to make sure that each firm/industry/msac/state combination is unique
 isid firm industry msac state
 
tempfile jan2019
save `jan2019'


********************************************************************************
*Open and clean 2020 file
********************************************************************************

use `workingdata'/homebase_raw_2020_update, clear


*Rename and keep relevant variables
 ren sthud	state
 ren stfipshud	stfips
 ren msachud	msac
 ren ind	industry
 ren company_id	firm
 ren user_id	person
 egen establishment = group(firm location_id)
 
 drop if state==.
 drop if msac==.

 gsort firm establishment person event_date
 
 keep firm industry msac state stfips week establishment person day hours_worked numdaysinweek 

 
*Identify max week with at least seven days.
 sum week if numdaysinweek==7
 loc maxweek `r(max)'
 
*Prepare data for analysis (full dataset)
*Identify max week with at least seven days.
 keep if inrange(week,4,`maxweek')

 
*Keep obs with reasonable hours, those in the US, those with real stfps
 keep if inrange(hours_worked,0,20)
 keep if inrange(stfips,1,56)

 
*Create new firm definition, matching what Feng uses. 
*Firms should be specific to industry and msa
 egen new_firm = group(firm industry msac state), missing 	
								

tempfile fulldata
save `fulldata'


*Identify size of workforce in Jan 2020 (for calculating growth rate)
 keep if inrange(day,1,31)
 
 collapse 	(sum) 	hours_worked,					///
		by(new_firm firm week day person state msac industry)
		
*Collapse data for hours worked by firm throughout the month
 gsort new_firm firm industry msac state person
 gcollapse      (sum)	hours_worked,					///
		by(new_firm firm industry msac state person)
		
*Create a new variable tracking the number of workers at each firm in Jan 2020
 gen anywork=hours_worked>0
 
 gcollapse	(sum)	total_hours_jan20=hours_worked			///
			num_workers_jan20=anywork,			///
		by(new_firm firm industry msac state)

 isid new_firm firm industry msac state

tempfile jan2020
save `jan2020'



********************************************************************************
* Reopen 2020 data
********************************************************************************

*Bring in full data to construct firm-level and base period datasets 
use `fulldata'

	
*Construct firm/person/week dataset
 collapse (sum) hours_worked, by(new_firm firm week person state msac industry)
 isid new_firm week person
 gen anywork=(hours_worked>0)
tempfile firmpersonweek
save `firmpersonweek'

*Construct firm/week dataset
 collapse (sum) hours_worked anywork, by(new_firm firm week state msac industry)
 isid new_firm week
 rename anywork numworkers
 compress
tempfile firmweek
save `firmweek'
 
*Identify base period hours
 gen baseperiod=(inlist(week,4,5))
 keep if baseperiod==1
 collapse (sum) hours_worked numworkers, by(new_firm firm state msac industry)
 
*keep only firms with at least 80 total hours in base period
 keep if hours_worked >= 80
 
 replace hours_worked	=hours_worked/2
 replace numworkers	=numworkers/2
 rename hours_worked 	base_hours
 rename numworkers 	base_workers
 
 unique new_firm
 loc baselinecount=`r(unique)'
 
tempfile baseperiod
save `baseperiod'
  
  
  
********************************************************************************
* Create dataset of relevant firms
********************************************************************************

*Open firm/week dataset
use `firmweek'
 
*Keep if operating in baseperiod 
 merge m:1 new_firm firm state msac industry using `baseperiod', assert(1 3) keep(3) nogen

 
*Firms that shutdown might not showup in HB data. We create observations, so we
*can observe their zero hours worked.
 fillin new_firm week

*Backfill values for observations left missing
 gsort new_firm week
 gsort new_firm _fillin week
 foreach var in firm state msac base_hours base_workers industry {
  by new_firm: replace `var'=`var'[1] if _fillin==1
 }
 replace hours_worked=0 if hours_worked==.
 replace numworkers=0 if numworkers==.

*Replace firm/week dataset
tempfile firmweek
save `firmweek'


*Do firms close and open and close again?
 isid new_firm week
 gsort new_firm week
 gen anywork=(hours_worked>0)
 by new_firm: gen openfirstweek=anywork[1]
 keep if openfirstweek==1

*Confirm that firms are open in first week
 assert anywork==1 if week==4
 
 tsset new_firm week
 tsspell anywork
 by new_firm: egen totspell=max(_spell)
 
*Count firms that pop in and out multiple times
 forvalues spell=3(2)11 {
  unique firm if totspell>=`spell'
  loc spell`spell'=`r(unique)'
 }
 
*Count firms open whole time
 by new_firm: gen alwaysopen=(anywork[1]==1) & (anywork[_N]==1) & (totspell==1)
 tab alwaysopen

 
*Count firms open, closed, and remained shutdowndd
 by new_firm: gen remainshutdown=(anywork[1]==1) & (anywork[_N]==0) & (totspell==2)
 tab remainshutdown

 
*Count firms that closed, reopened, closed again, and then reopened, etc
 by new_firm: gen pop_inout = (anywork[1]==1) & (totspell>=4)
 tab pop_inout

 
*Count firms that shutdown, reopened, and remain open
 by new_firm: gen reopened = (anywork[1]==1) & (anywork[_N]==1) & (totspell>=3)
 tab reopened
 
 
*Identify and keep firms that ever shutdown
 *by new_firm: egen minhours=min(hours_worked)
 *by new_firm: gen evershutdown=(minhours==0)
 *gen evershutdown=(minhours==0) & inrange(week,11,14)
 
*On 10-27-20, I changed the following line to be "anywork==0" instead of "minhours==0"
 gen evershutdown=anywork==0 & inrange(week,11,14)

 
/*Identify whether these "ever shutdown" firms come back. We'll identify those 
by comparing most recent week with zero hours versus latest with with non-zero 
hours.*/
 gsort new_firm anywork week
 by new_firm: gen minshutdownweek=week[1]
 by new_firm: gen firmmaxweek=week[_N]
 
 gsort new_firm -anywork week
 by new_firm: gen maxshutdownweek=week[_N]
 
 assert maxshutdownweek>=minshutdownweek
 

 collapse 	(max) 	evershutdown		///
			reopened 		///
			minshutdownweek 	///
			maxshutdownweek		///
			firmmaxweek		///
			base_hours		///
			base_workers,	 	///
		by(new_firm firm industry msac state)
 isid new_firm
 
 replace reopened=. if evershutdown==0
 
tempfile shutdown_reopened
save `shutdown_reopened'
 
 
 
********************************************************************************
*Create dataset for growth rate characteristics
********************************************************************************

use `shutdown_reopened', clear

*Merge in Jan 2020 info
 merge 1:1 new_firm firm industry msac state using `jan2020', gen(jan20_merge)
 drop if jan20_merge==2
 replace total_hours_jan20=0 if jan20_merge==1
 replace num_workers_jan20=0 if jan20_merge==1
 
*Merge in Jan 2019 data 
 merge 1:1 firm industry msac state using `jan2019', gen(jan19_merge)
 drop if jan19_merge==2
 replace total_hours_jan19=0 if jan19_merge==1
 replace num_workers_jan19=0 if jan19_merge==1

 
*Create variable to track growth rate for hours worked by firm 
 gen growth_rate_hours=	(total_hours_jan20-total_hours_jan19)	/	///
			((total_hours_jan20+total_hours_jan19)/2)

*Create variable to track growth rate for number of workers at a firm 
 gen growth_rate_workers=	(num_workers_jan20-num_workers_jan19)	/	///
				((num_workers_jan20+num_workers_jan19)/2)

				
/*Prepare the growth rate values for the logit model. Shift up growth rates by 2 
so that there are no neg values. Stata also doesn't allow decimals in labels, 
so multiply all values by 10.*/
 gen hours_rate_shifted=(growth_rate_hours+2)*10
 gen workers_rate_shifted=(growth_rate_workers+2)*10
 
 
*Make bins for growth rate variables 
*The labels represent the original values of the growth rate calculations

*Hours Growth Rate Bins
 egen growthrate_hours_binned=cut(hours_rate_shifted), at(0, 5, 10, 15, 20, 25, 30, 35, 40)
 label define rate_hours_labels 	0 "-2 to -1.5"	///
					5 "-1.5 to -1" 	///
					10 "-1 to -0.5" ///
					15 "-0.5 to 0" 	///
					20 "0 to 0.5"	///
					25 "0.5 to 1" 	///
					30 "1 to 1.5"	///
					35 "1.5 to 2", replace
				
 label values growthrate_hours_binned rate_hours_labels

*Firm Size Growth Rate Bins
 egen growthrate_firmsize_binned=cut(workers_rate_shifted), at(0, 5, 10, 15, 20, 25, 30, 35, 40)
 label define rate_firmsize_labels 	0 "-2 to -1.5"	///
					5 "-1.5 to -1" 	///
					10 "-1 to -0.5" ///
					15 "-0.5 to 0" 	///
					20 "0 to 0.5"	///
					25 "0.5 to 1"	///
					30 "1 to 1.5" 	///
					35 "1.5 to 2", replace
				
 label values growthrate_firmsize_binned rate_firmsize_labels
 
 
*Create bins for firm size
 egen firmsize_binned=cut(base_workers), at(0,11,50,100,237)
 label define firmsize_labels 0 "0-10" 11 "11-49" 50 "50-99" 100 "100+"
 label values firmsize_binned firmsize_labels
 
 
**************************************************************************
*Part II: Logit Models and Figures Creation
**************************************************************************


cap program drop firmprob
program def firmprob 

 syntax, 	outcome(name) 					///
		title(string)					///
		xaxrange1(real) xaxrange2(real) xaxstep(real)	///
		exportfile(string)				///
		grexportfile(string)				///
		estexportfile(string)				///
		[grexportfilenolab(string)]


 logit `outcome' 	ib15.growthrate_firmsize_binned 	///
			i.firmsize_binned			///
			ib7.industry				///
			i.state		
 estimates store logit_results
  
 eststo marg: margins ,						///
			dydx(   firmsize_binned 		///
				industry			///
				growthrate_firmsize_binned	///
				) 				///
			noestimcheck post atmeans 		///
			at( 	firmsize_binned=0			///
				growthrate_firmsize_binned=0		///
				state=5					///
				industry=7				///
				)
  estimates save `estexportfile'				
  
  coefplot 	(marg,							///
				msymbol(circle_hollow)			///
				color("196 130 14")			///
				ciopts(color("196 130 14"))		///
				keep(*.firmsize_binned)			///
				baselevels)				///
		(marg, 							///
				color("196 130 14")			///
				ciopts(color("196 130 14"))		///
				keep(*.firmsize_binned))		///
		(marg,							///
				msymbol(circle_hollow)			///
				color("0 50 98")			///
				ciopts(color("0 50 98"))		///
				keep(*.growthrate_firmsize_binned)	///
				baselevels)				///
		(marg,							///
				color("0 50 98")			///
				ciopts(color("0 50 98"))		///
				keep(*.growthrate_firmsize_binned)) 	///
		, 							///
		headings(	0.firmsize_binned=			///
					"{bf:Firm Size}" 		///
				0.growthrate_firmsize_binned=		///
					"{bf:Growth Rate}"		///
				1.industry=				///
					"{bf:Industry}"			///
				, 					///
				wrap(24) labs(medsmall))		///		
		fcolor(*.8)  	        				///
		format(%9.1f)  nooffset                         	///
		legend(off) grid(none)		        		///
		transform(*=@*100)		       			///
		xtitle("Marginal effect (p.p.)", size(medium))		///
		xsc(titlegap(*5))					///
		ylab(,labs(medsmall))		        		///
		xlab(`xaxrange1'(`xaxstep')`xaxrange2', 		///
			grid gstyle(minor) labs(medsmall))		///
		xline(0, lp(dash) lc(red) lw(vthin))			///
		scheme(s1color)                         		///
		plotregion(margin(b=2))					///
		ti(`title', size(medium))				///
	addplot(scatter @at @b if @b!=0, 		        	///
			ms(i) mlabel(@b) mlabpos(12)			///
			mlabcolor(black) mlabsize(small))

gr save 	`grexportfile', replace	
gr export 	`exportfile', replace

 
 end
 
 
 firmprob, 	outcome(evershutdown)						///
		title("Panel A: Shutdown")					///
		xaxrange1(-40) xaxrange2(40) xaxstep(20)			///
		exportfile("`results'/figures/firmlevel_prob_shutdown_oct25.png")	///
		grexportfile("`results'/figures/firmlevel_prob_shutdown_oct25.gph")	///
		estexportfile("`results'/estimates/firmlevel_ests_evershutdown_oct25.ster", replace)
 
 
 firmprob, 	outcome(reopened)					///
		title("Panel B: Reopened")				///
		xaxrange1(-40) xaxrange2(40) xaxstep(20)		///
		exportfile("`results'/figures/firmlevel_prob_reopened_oct25.png")	///
		grexportfile("`results'/figures/firmlevel_prob_reopened_oct25.gph")	///
		estexportfile("`results'/estimates/firmlevel_ests_reopened_oct25.ster", replace)

 
 
 graph combine 	"`results'/figures/firmlevel_prob_shutdown_oct25.gph"			///
		"`results'/figures/firmlevel_prob_reopened_oct25.gph", 		///
		scheme(s1color) r(1) c(2)		
 gr export "`results'/figures/firmlevel_prob_combined_oct25.png", replace
 
 
 		
********************************************************************************
*Merged CPS and HB policy output
********************************************************************************

  estimates use `results'/estimates/firmlevel_ests_evershutdown_oct25
  estimates store firmshutdown
  
  estimates use `results'/estimates/firmlevel_ests_reopened_oct25
  estimates store firmreopened

 
*Create table with estimates for PPP and UI policies
  esttab 	firmshutdown 	///
		firmreopened	///
	using "`results'/table/firmlevel_coefs_oct25.csv", 		///
	keep(	*.firmsize_binned	*.growthrate_firmsize_binned) 	///
	replace nostar b(%9.3fc) se(%9.3fc)
 
			
********************************************************************************

log close firmlevel_prob_log
********************************************************************************
 