# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=1 --mem=4g --export=ALL,script=cw_date_st_reg.R zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)
library(haven)

print(sessionInfo())

setwd(paste0(dir_proj))

#===============================================================================

dt_cw <- readRDS(paste0(dir_clean, "cw/cw_geo_nber_state.rds"))
dt_cw[, state_str:=st2_str]
dt_cw <- dt_cw[, c("st","state_str")]

#-------------------------------------------------------------------------------
# Stay-at-home order

#---------------------------------------
# Dates collected by Rustandy Center

# dt_raw <- data.table(readxl::read_excel(paste0(dir_raw, "date/Covid19 Regulations in the US 6_17_20.xlsx"), sheet="State"))
dt_raw <- data.table(readxl::read_excel(paste0(dir_raw, "date/Covid19 Regulations in the US 7_17_20.xlsx"), sheet="State"))
dt_raw <- dt_raw[, .SD, .SDcols=c("Name", "State", "Release Date", "Duration of the Policy", "Extended Until", "Reopening")]
setnames(dt_raw, names(dt_raw), c("stay","state_str","stayrel","stayend1","stayend2","reop"))
dt_raw[state_str=="Tenessee", state_str:="Tennessee"]
dt_raw[, c("stayrel","reop"):=.(as.Date(stayrel),as.Date(reop))]
dt_raw[, stayend1:=as.Date(as.integer(stayend1), origin="1899-12-30")]
dt_raw[, stayend2:=as.Date(as.integer(stayend2), origin="1899-12-30")]
nrow(dt_raw[stayend1>=stayend2])
dt_raw[!is.na(stayend2), stayend:=stayend2]
dt_raw[is.na(stayend2), stayend:=stayend1]
dt_raw[, c("stayend1","stayend2"):=NULL]
dt_raw[state_str=="South Carolina", stayend:=as.Date("2020-04-27")]

dt_rawa <- data.table(state_str=c("District of Columbia","Virgin Islands","Guam"), 
                      stayrel=as.Date(c("2020-03-30","2020-03-23",NA)),
                      stayend=as.Date(c("2020-05-15",NA,NA)),
                      reop=as.Date(c("2020-05-29",NA,NA)))
dt_raw <- rbind(dt_raw, dt_rawa, fill=T)
dt_raw[, state_str:=toupper(state_str)]

dt_raw <- merge(dt_raw, dt_cw, by=c("state_str"), all.x=T)
print(nrow(dt_raw[is.na(st),]))

#---------------------------------------
# Dates collected by NASHP

dt_nashp <- fread(paste0(dir_raw,"date/date_stay_at_home_state_nashp.csv"))
dt_nashp <- dt_nashp[,c("State","Was a stay-at-home order issued?","Effective dates")]
setnames(dt_nashp, names(dt_nashp),c("state_str","is_stay","dater_stay"))
dt_nashp[, state_str:=toupper(state_str)]
dt_nashp[, c("dstay0","dstay1"):=tstrsplit(dater_stay," - ")]
dt_nashp[, dstay0:=stringr::str_extract(dstay0,"^(\\w+ \\d+)")]
dt_nashp[, dstay1:=stringr::str_extract(dstay1,"^(\\w+ \\d+)")]
# Manual fixes
dt_nashp[state_str=="WISCONSIN", dstay1:="May 13"]
dt_nashp[, dstay0:=as.Date(paste0(dstay0, ", 2020"), format="%B %d, %Y")]
dt_nashp[, dstay1:=as.Date(paste0(dstay1, ", 2020"), format="%B %d, %Y")]

#---------------------------------------
# Update end of stay-at-home order with NASHP data

dt_rawu <- merge(dt_raw,dt_nashp[,c("state_str","is_stay","dstay0","dstay1")],by=c("state_str"),all=T)
print(dt_rawu[!is.na(stayrel) & (stayend<dstay1 | (is.na(stayend) & !is.na(dstay1))),
              c("st","state_str","stayrel","stayend","dstay0","dstay1")])
dt_rawu[!is.na(stayrel) & (stayend<dstay1 | (is.na(stayend) & !is.na(dstay1))),
        stayend:=dstay1]

dt_rawu[, dstay:=as.Date(stayrel)]
dt_rawu[, dstay_end:=as.Date(stayend)]
dt_rawu[, dreop:=as.Date(reop)]
dt_out <- dt_rawu[, c("st", "dstay","dstay_end","dreop")]

#---------------------------------------
# Create state categories based on dates

# March 22 or before, March 23-30th, March 31st to now, and never
dt_out[dstay<=as.Date("2020-03-22"), stayc:=1]
dt_out[dstay>=as.Date("2020-03-23") & dstay<=as.Date("2020-03-30"), stayc:=2]
dt_out[dstay>=as.Date("2020-03-31"), stayc:=3]
dt_out[is.na(dstay), stayc:=4]

dt_out[, stayc:=factor(stayc)]
setattr(dt_out$stayc, "levels", c("3/22 or Earlier", "3/23-2/30", "3/31 or Later", "Never"))

# Whether the state is reopening
dt_out[!is.na(dstay) & is.na(dreop), reopc:=1]
dt_out[!is.na(dstay) & !is.na(dreop), reopc:=2]
dt_out[is.na(dstay), reopc:=3]
dt_out[, reopc:=factor(reopc)]
setattr(dt_out$reopc, "levels", c("Still Stay-at-home", "Reopening", "Never Stay-at-home"))

saveRDS(dt_out, paste0(dir_clean, "date/cw_date_st_reg.rds"))
write_dta(dt_out, paste0(dir_clean, "date/cw_date_st_reg.dta"))
fwrite(dt_out, paste0(dir_clean, "date/cw_date_st_reg.csv"))

print(paste("Ended at", Sys.time()))
# End of R script
