print(paste("Ended at", Sys.time()))
## [1] "Ended at 2020-07-18 11:32:12"
source(paste0(dir_func,"f_readhb.R"))
source(paste0(dir_func,"f_wk.R"))
source(paste0(dir_func,"f_ratio.R"))

dt_firm <- readRDS(paste0(dir_clean, "homebase_sel_firm_ind_geo_2020.rds"))
dt_firm <- dt_firm[firm_base_2wk==1 & st_sel_2wk==1,] # Keep only selected firms.
setnames(dt_firm, c("size_2wk","sizec_2wk","hours_2wk"), c("size","sizec","hours"))
dt_firm[, nfirm:=1]

# Keep only states + DC
dt_st <- readRDS(paste0(dir_clean, "cw/cw_geo_nber_state.rds"))
dt_firm <- merge(dt_firm, dt_st[,c("st","stfips","st_str","st2_str","state_reg","state_div")],by=c("st"),all.x=T)
dt_firm <- dt_firm[stfips<=56,]

# dt_statediv <- readRDS(paste0(dir_clean, "cw_geo_state.rds"))[,c("st","st_str","st2_str","state_reg","state_div")]
# dt_statediv <- dt_statediv[!st_str %in% c("AS","MP","NOT USA","UNCLASSIFIED"), ]
# dt_firm <- merge(dt_firm, dt_statediv, by=c("st"), all=T) # Keep VT from div
dt_firm[, state_str:=as.character(st2_str)]

#-------------------------------------------------------------------------------
# Function for horizontal bar charts

f_hbar <- function(dt_in, vx, vy, tx="", ty="", ttitle="", ggadd=theme()) {
  ggplot(dt_in, aes_string(x=vx, y=vy, fill=vx)) + ggtheme + ggadd +
    geom_bar(stat = "identity", position = 'dodge') + coord_flip() +
    scale_fill_brewer(palette = "Set3") +
    theme(panel.grid.major.y = element_blank()) +
    labs(x=tx, y=ty,
         legend.position = "none") +
    guides(fill = FALSE)
}
#-------------------------------------------------------------------------------
# Firm size

dt_sum <- copy(dt_firm)
dt_sum[, hours_cat:=cut(hours/80,c(0,5,10,20,50,Inf))]
setattr(dt_sum$hours_cat, "levels", c("1-5","6-10","11-20","21-50","50+"))

dt_fig <- dt_sum[, lapply(.SD, sum, na.rm=T), by=c("hours_cat"), .SDcols=c("nfirm","hours")]
dt_fig[, sfirm:=nfirm/sum(dt_fig$nfirm)]
dt_fig[, shours:=hours/sum(dt_fig$hours)]

# Share of firms
ggplot(dt_fig[!is.na(hours_cat), ], aes(x=hours_cat, y=sfirm, fill=hours_cat)) + ggtheme +
  geom_bar(stat = "identity", position = 'dodge') +
  scale_fill_brewer(palette = "Set3") +
  theme(panel.grid.major.x = element_blank()) +
  labs(x="Equivalent # of Full-Time Employees", y="Share of Firms",
       legend.position = "none") +
  guides(fill = FALSE)

ggsave(paste0(dir_ofig,"PA_firm_rep_size.png"),width=9, height=6, dpi=300)
fwrite(dt_fig[!is.na(hours_cat), ],paste0(dir_ofigd,"PA_firm_rep_size.csv"))
#-------------------------------------------------------------------------------
# Industry

dt_sum <- dt_firm[, lapply(.SD, sum, na.rm=T), by=c("ind"), .SDcols=c("nfirm","hours")]
dt_sum[, sfirm:=nfirm/sum(dt_sum$nfirm)]
dt_sum[, shours:=hours/sum(dt_sum$hours)]
dt_sum[, sfirm_lab:=paste0(round(sfirm*100),"%")]
dt_sum[, shours_lab:=paste0(round(shours*100),"%")]
dt_sum <- dt_sum[!is.na(ind),]

# Bar plot
setorder(dt_sum, sfirm)
dt_sum[, indf:=as.factor(seq(.N))]
setattr(dt_sum$indf, "levels", as.character(dt_sum$ind))

f_hbar(dt_sum, vx="indf", vy="sfirm", tx="", ty="Share of Firms", ttitle="Figure 7: Distribution Across Industries")

ggsave(paste0(dir_ofig,"PA_firm_rep_ind.png"),width=12, height=6, dpi=300)
fwrite(dt_sum,paste0(dir_ofigd,"PA_firm_rep_ind.csv"))
#-------------------------------------------------------------------------------
# Census Region

dt_emp <- as.data.table(readxl::read_excel(paste0(dir_raw,"bls/ststdnsadata.xlsx"), col_names=F))
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
setnames(dt_emp, c("...1","...2","...3","...4","...8"), c("fips","state_str","year","mon","emp"))
dt_emp <- dt_emp[, c("fips","state_str","year","mon","emp")]
dt_emp <- dt_emp[c(9:nrow(dt_emp)), ]
dt_emp <- dt_emp[year=="2020" & mon=="01"]
dt_emp <- dt_emp[nchar(fips)==2, ]
dt_emp[, emp:=as.numeric(emp)]
dt_emp[, semp:=emp/sum(dt_emp$emp)]
dt_emp[, state_str:=toupper(state_str)]

ivar <- "sfirm"

dt_sum <- dt_firm[, lapply(.SD, sum, na.rm=T), by=c("st","state_str","state_div","state_reg"), .SDcols=c("nfirm","hours")]
dt_sum <- merge(dt_sum, dt_emp, by=c("state_str"), all=T)
dt_sum[is.na(hours), hours:=0]
dt_sum[, shours:=hours/sum(dt_sum$hours)]
dt_sum[, dshare:=shours-semp]

dt_sum2 <- dt_sum[, lapply(.SD, sum, na.rm=T), by=c("state_reg"), .SDcols=c("nfirm","hours","emp")]
dt_sum2 <- dt_sum2[!is.na(state_reg), ]
dt_sum2[emp==0, emp:=NA]
dt_sum2[, sfirm:=nfirm/sum(dt_sum2$nfirm, na.rm=T)]
dt_sum2[, shours:=hours/sum(dt_sum2$hours, na.rm=T)]
dt_sum2[, semp:=emp/sum(dt_sum2$emp, na.rm=T)]
dt_sum2[, dshare:=hours/sum(dt_sum2$hours, na.rm=T)-emp/sum(dt_sum2$emp, na.rm=T)]  

dt_sum2 <- melt(dt_sum2[,.SD, .SDcols=c("state_reg",ivar,"semp")], id.vars = "state_reg", variable.name = "var", value.name = "share")
dt_sum2[var=="semp",varf:=2]
dt_sum2[var==ivar,varf:=1]
dt_sum2[, varf:=factor(varf)]
if (ivar == "sfirm") {
  setattr(dt_sum2$varf, "levels", c("# of Firms from Homebase","Employment from BLS"))
} else if (ivar == "shours") {
  setattr(dt_sum2$varf, "levels", c("Hours from Homebase","Employment from BLS"))
}

vx <- "forcats::fct_rev(state_reg)"
vy <- "share"
tx <- ""
ty <- "Share"
ggplot(dt_sum2[!is.na(state_reg), ], aes_string(x=vx, y=vy, fill="varf")) + ggtheme +
  geom_bar(stat = "identity", position = 'dodge') + coord_flip() +
  scale_fill_brewer(palette = "Paired") +
  theme(panel.grid.major.y = element_blank(),legend.position = "bottom") +
  labs(x=tx, y=ty, fill="") + guides(fill = guide_legend(reverse=TRUE))

ggsave(paste0(dir_ofig,"PA_firm_rep_region.png"),width=12, height=6, dpi=300)
fwrite(dt_sum2[!is.na(state_reg), ],paste0(dir_ofigd,"PA_firm_rep_region.csv"))
print(paste("Ended at", Sys.time()))
## [1] "Ended at 2020-07-18 11:32:20"
# End of R script