# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=8 --mem=16g --export=ALL,script=ppp_2_stc.R --partition=high zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)

print(sessionInfo())
setwd(paste0(dir_proj))

#===============================================================================

dir_ppp <- paste0(dir_clean,"ppp/")
dir_cen <- paste0(dir_clean,"census/")

dt_ppp <- readRDS(paste0(dir_ppp,"ppp_clean.rds"))
dt_ppp[,naics:=as.character(naics)]
dt_cbp <- readRDS(paste0(dir_cen,"cbp_2018_st.rds"))

#===============================================================================
# Subset NAICS 44 Retail Trade and 72 Accommodation and Food Services

dt_ppps <- dt_ppp[grepl("^(44|72)",naics) & !is.na(loan) & date_approval<=as.Date("2020-04-16")] # Keep only <150k & approved by 4/16
dt_cbps <- dt_cbp[grepl("^(44|72)----",naics) & !grepl("Government|All Est",as.character(lfo))] # Keep only non-gov

dt_pppa <- dt_ppps[, lapply(.SD,sum,na.rm=T), by=c("st","stfips"), .SDcols=c("loan")][stfips<=56]
setnames(dt_pppa, "loan", "loan_sel")

lsize <- c("1_5$","5_9$","10_19$","20_49$")
dt_cbpa <- dt_cbps[, lapply(.SD,sum,na.rm=T), by=c("st","stfips"), 
                   .SDcols=patterns(paste(paste(paste0("e",lsize),collapse = "|"),paste(paste0("q",lsize),collapse = "|"),sep="|"))][stfips<=56]
dt_cbpa[, emp_sel:=e1_5+e5_9+e10_19+e20_49]
dt_cbpa[, pay_sel:=q1_5+q5_9+q10_19+q20_49]

dt_out <- merge(dt_pppa,dt_cbpa[,c("st","stfips","emp_sel","pay_sel")], by=c("st","stfips"), all=T)
dt_out[, loanr:=loan_sel/pay_sel]
setorderv(dt_out,"loanr")
dt_out[, loanperc:=.I/.N]
dt_out[, loanc:=cut(loanperc,c(0,0.25,0.5,0.75,1),labels = c("Q1","Q2","Q3","Q4"))]

dt_out1 <- copy(dt_out)[,c("st","stfips","loan_sel","pay_sel","loanc")]

#===============================================================================
# Aggregate number from Marianne

dt_raw <- data.table(readxl::read_excel(paste0(dir_raw,"ppp/PPP.xlsx")))
dt_st <- readRDS(paste0(dir_clean,"cw/cw_geo_nber_state.rds"))[,State:=toupper(st2_str)]

setnames(dt_raw, c("...1","payroll_nonfarm_april2019","approved_ppp_loans_0416","approved_ppp_amount_0416",
                   "approved_ppp_loans_0516","approved_ppp_amount_0516","approved_ppp_loans_0523","approved_ppp_amount_0523"),
         c("st_str","payrollnf_201904","loann_0416","loana_0416","loann_0516","loana_0516","loann_0523","loana_0523"))

dt_out <- merge(dt_raw[!is.na(st_str)], dt_st[,c("stfips","st","st2","st_str")], by=c("st_str"), all.x=T)[,st_str:=NULL]

setcolorder(dt_out, c("st","st2","stfips"))

# Divide states into four groups based on ppp (keep only states + DC)
dt_out <- dt_out[stfips<=56]

for (ippp in c("0416","0516","0523")) {
  vppp <- paste0("loana_",ippp)
  vpppc <- paste0("loanac_",ippp)
  dt_out[,pppr:=get(vppp)/payrollnf_201904]
  setorderv(dt_out,c("pppr"))
  dt_out[, pppperc:=.I/.N]
  dt_out[, (vpppc):=cut(pppperc,c(0,0.25,0.5,0.75,1),labels = c("Q1","Q2","Q3","Q4"))]
  dt_out[, c("pppr","pppperc"):=NULL]
}

# Add category for UI as well

lui <- factor(c(1:4))
levels(lui) <- c("125-141","141-152","152-161","161-177")
dt_out[as.integer(st) %in% c(2,5,6,12,19,23,28,29,36,39,41,43,46), uic:=lui[2]]
dt_out[as.integer(st) %in% c(10,13,15,17,20,26,35,38,42,44,45,50,51), uic:=lui[3]]
dt_out[as.integer(st) %in% c(1,4,8,11,16,18,25,27,32,34,37,49), uic:=lui[4]]
dt_out[is.na(uic), uic:=lui[1]]

dt_out[st=="DC", uic:=NA]

dt_out2 <- copy(dt_out)[,-c("st2")]

# Update 20201024: New UI Replacement Rate
dt_ui <- fread(paste0(dir_raw,"misc/rep_rate_state.csv"))
dt_ui <- dt_ui[state!="US"]
dt_ui[,rrq:=cut(100*rr_fpuc, quantile(100*rr_fpuc, probs = c(0,0.25,0.5,0.75,1)),include.lowest = TRUE)]
tlvl <- levels(dt_ui$rrq)
tlvl <- gsub("\\[|\\]|\\(|\\)","",tlvl)
tlvl <- gsub(",","-",tlvl)
setattr(dt_ui$rrq,"levels",tlvl)
dt_out2 <- merge(dt_out2[,st_str:=as.character(st)],dt_ui[,c("state","rrq")], by.x="st_str", by.y="state", all.x=T)

dt_dev <- dt_out2[as.numeric(uic)!=as.numeric(rrq),c("st","st_str","uic","rrq")]
dt_dev[,uicn:=as.numeric(uic)]
dt_dev[,rrqn:=as.numeric(rrq)]
dt_dev <- merge(dt_dev, data.table(readxl::read_excel(paste0(dir_raw,"misc/quartile_changes.xlsx"))), by.x="st_str", by.y="state", all=T)
setnames(dt_dev,c("previous quartile", "current quartile"),c("q0","q1"))
print(dt_dev[q0!=uicn|q1!=rrqn|is.na(q0)|is.na(q1)])

dt_out2[, st_str:=NULL]
setnames(dt_out2,c("uic","rrq"),c("uic_old","uic"))

#===============================================================================
# Export

dt_out <- merge(dt_out1, dt_out2, by=c("st","stfips"), all=T)

setorderv(dt_out, "stfips")
saveRDS(dt_out,paste0(dir_ppp,"ppp_stc_sel.rds"))
haven::write_dta(dt_out,paste0(dir_ppp,"ppp_stc_sel.dta"))

print(paste("Ended at", Sys.time()))
# End of R script
