# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=8 --mem=32g --export=ALL,script=data_1_raw_dta.R --partition=high zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

print(args)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)
library(haven)

print(sessionInfo())

setwd(paste0(dir_proj))

#===============================================================================
# Load raw data

dt_raw <- readRDS(paste0(dir_cleanl,"homebase_raw.rds"))[event_date<as.Date("2020-01-01")]

# dt_rawc <- copy(dt_raw)
# dt_raw <- copy(dt_rawc)

dt_geo <- readRDS(paste0(dir_clean,"cw_geo_improved.rds"))[ziphb!="",]
if (nrow(dt_geo)!=nrow(unique(dt_geo[,c("ziphb")]))) {
  print("ERROR: CHECK GEO CROSSWALK")
}

print(paste0("Rows without original county FIPS: ",nrow(dt_raw[is.na(fips)])))
nrow <- nrow(dt_raw)

# Rename selected variables
lvren <- c("zip","st","st2","stfips","stssa","msa","msa2","msac","fips")
setnames(dt_raw, lvren, paste0(lvren,"hb"))

setnames(dt_geo, "msa", "msac")
lvreng <- c("zip","st","stfips","msac","fips")
setnames(dt_geo, lvreng, paste0(lvreng,"hud"))

dt_raw <- merge(dt_geo[,.SD,.SDcols=c("ziphb",paste0(lvreng,"hud"))], dt_raw, by=c("ziphb"), all.y=T)

print(paste0("Rows without improved county FIPS: ",nrow(dt_raw[is.na(fipshud)])))
if (nrow!=nrow(dt_raw)) {
  print("ERROR: MERGE ISSUES")
}
dt_raw[, ziphb:=NULL]

write_dta(dt_raw, paste0(dir_clean,"homebase_raw_pre2020.dta"))

print(paste("Ended at", Sys.time()))
# End of R script
