# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=2 --mem=8g --export=ALL,script=kr_1_raw.R --partition=high zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)

print(sessionInfo())
setwd(paste0(dir_proj))

#===============================================================================

dir_misc <- paste0(dir_clean,"misc/")

#---------------------------------------
# Kronos Overall

dt_kr <- data.table(readxl::read_excel(paste0(dir_raw,"misc/Kronos Punch Data since Jan 2020_07-12-20.xlsx"), 
                                       sheet = "Overall Data",col_names = T))
setnames(dt_kr, names(dt_kr)[1],"var")
dt_kr[var==">100 employees",var:="nemp_g100"]
dt_kr[var=="<100 employees",var:="nemp_l100"]
dt_kr[var=="Total Punches",var:="nemp"]
dt_kr <- melt(dt_kr,id.vars="var",variable.name="weekd",variable.factor=F,value.name="val")
dt_kr[,weekd:=as.integer(weekd)][,weekd:=weekd-min(weekd)+as.Date("2020-01-05")]
dt_kr <- dcast(dt_kr, weekd ~ var, value.var="val")[,date:=weekd]

dt_kr[,date:=as.Date(date)]
for (irt in c("_g100","_l100","")) {
  ivn <- paste0("nemp", irt)
  ivr <- paste0("remp", irt)
  dt_kr[, (paste0(ivn,"b")):=mean(dt_kr[date %in% as.Date(c("2020-01-26","2020-02-02")),][[ivn]])] # Dates are end of week
  dt_kr[, (ivr):=get(ivn)/get(paste0(ivn,"b"))]
}

saveRDS(dt_kr, paste0(dir_misc,"kronos_overall.rds"))

#---------------------------------------
# Kronos by State

dt_kr <- data.table(readxl::read_excel("/accounts/projects/jrothst/homebase/covid/rawdata/Kronos Punch Data since Jan 2020_07-12-20.xlsx", 
                                       sheet = "Punch Data"))
setnames(dt_kr, names(dt_kr), c("state_reg","st2_str","nemp","weekd"))
dt_kr[,weekd:=as.Date(weekd)]
dt_kr[, st2_str:=toupper(st2_str)]

dt_kr <- merge(dt_kr[,c("st2_str","weekd","nemp")],
               readRDS(paste0(dir_clean,"cw/cw_geo_nber_state.rds"))[,c("st","stfips","st2_str")],
               by=c("st2_str"), all.x=T)
print(unique(dt_kr[is.na(st),c("st2_str")]))
dt_kr <- dt_kr[!is.na(st),]
for (irt in c("")) {
  ivn <- paste0("nemp", irt)
  ivr <- paste0("remp", irt)
  dt_krb <- dt_kr[weekd %in% as.Date(c("2020-01-26","2020-02-02")),][,lapply(.SD,mean,na.rm=T),by=c("st"),.SDcols=c(ivn)]
  setnames(dt_krb, ivn, paste0(ivn,"b"))
  dt_kr <- merge(dt_kr, dt_krb, by=c("st"), all.x=T)
  dt_kr[, (ivr):=get(ivn)/get(paste0(ivn,"b"))]
}
dt_kr[, ln_remp:=log(remp)]

saveRDS(dt_kr, paste0(dir_misc,"kronos_state.rds"))

print(paste("Ended at", Sys.time()))
# End of R script
