# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=6 --mem=36g --export=ALL,script=sg_1_core_poi_merge.R --partition=high zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

print(args)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)

print(sessionInfo())

setwd(paste0(dir_proj))

#===============================================================================

lpoi <- list.files(paste0(dir_clean, "safegraph/"), pattern = "core_poi_raw_", full.names = T)

dt_poi <- data.table()

ipoi <- lpoi[1]

for (ipoi in lpoi) {
  print(ipoi)
  
  iver <- stringr::str_extract(ipoi, "\\d{6}")
  dti_poi <- readRDS(ipoi)
  setnames(dti_poi, c("safegraph_place_id","postal_code","naics_code"), c("locid","zip","naics"))
  lvdrop <- names(dti_poi)[! names(dti_poi) %in% c("locid","zip","naics")]
  dti_poi[, (lvdrop):=NULL]
  dti_poi[, (paste0("poi_ver_", iver)):=1]
  
  if (nrow(dt_poi)==0) {
    dt_poi <- copy(dti_poi)
  } else {
    dt_poi <- merge(dt_poi, dti_poi, by=c("locid","zip","naics"), all=T)
  }
}

dt_dup <- dt_poi[, .(ndup=.N),by=c("locid")]
dt_dup <- dt_dup[ndup!=1]
dt_dup[, ndup:=NULL]
dt_dup[, poi_dup:=1]

print(paste0("# of Duplicated Locations: ", nrow(dt_dup)))

dt_poi <- merge(dt_poi, dt_dup, by=c("locid"), all.x=T)

saveRDS(dt_poi,paste0(dir_clean, "safegraph/safegraph_core_poi_raw.rds"))

print(paste("Ended at", Sys.time()))
# End of R script
