# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=2 --mem=128g --export=ALL,script=sg_1_visit_raw_update.R --partition=low zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

print(args)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)

print(sessionInfo())

setwd(paste0(dir_proj))

#===============================================================================

dir_rawsg <- paste0(dir_raw,"safegraph/")

# Variables to keep
var_long <- 0
if (var_long==1) {
  lvar <- c("safegraph_place_id","location_name","street_address","city","region","postal_code",
            "safegraph_brand_ids","brands",
            "date_range_start","date_range_end",
            "raw_visit_counts","raw_visitor_counts","visits_by_day")
} else {
  lvar <- c("safegraph_place_id","city","region","postal_code",
            "date_range_start","date_range_end",
            "raw_visit_counts","raw_visitor_counts","visits_by_day")
}

#-------------------------------------------------------------------------------
# Update prior to 2020-06-24

dir_gz <- paste0(dir_rawsg,"weekly_patterns/v2/main-file/")

lzip <- list.files(dir_gz)
lzip <- lzip[as.Date(sub("^(\\d{4}-\\d{2}-\\d{2}).*?$","\\1",lzip)) >= as.Date("2019-12-30")]
lzip <- lzip[as.Date(sub("^(\\d{4}-\\d{2}-\\d{2}).*?$","\\1",lzip)) > as.Date("2020-05-11")]

lzip <- paste0(dir_gz,lzip)

#-------------------------------------------------------------------------------
# Update since 2020-06-24

dir_gz2 <- paste0(dir_rawsg,"weekly_patterns_delivery/weekly/patterns/")
lzip2 <- list.files(dir_gz2,full.names = T, pattern = "\\.csv.gz", recursive = T, include.dirs = F)

#-------------------------------------------------------------------------------

lzip <- c(lzip, lzip2)

dt_out <- data.table()

for (i in c(1:length(lzip))) {
  
  print(lzip[i])
  dt_raw <- data.table::fread(paste0(lzip[i]), colClasses = list("character"="postal_code"))
  dt_sel <- dt_raw[, .SD, .SDcols=lvar]
  dt_sel[, date_range_start:=as.Date(sub("^(\\d{4}-\\d{2}-\\d{2}).*?$","\\1",date_range_start))]
  dt_sel[, date_range_end:=as.Date(sub("^(\\d{4}-\\d{2}-\\d{2}).*?$","\\1",date_range_end))]
  rm(dt_raw)
  dt_out <- rbind(dt_out, dt_sel)
  rm(dt_sel)
}

dt_out[, c(paste0("visit_wkd_", c(1:7))):=tstrsplit(gsub("\\[|\\]","",visits_by_day), ",", fixed=T)]
for (i in c(1:7)) {
  dt_out[, eval(paste0("visit_wkd_",i)):=as.integer(get(paste0("visit_wkd_",i)))]
}

dt_raw <- readRDS(paste0(dir_clean, "safegraph/safegraph_raw_lite_2020.rds"))
saveRDS(rbind(dt_raw,dt_out), paste0(dir_clean, "safegraph/safegraph_raw_lite_2020_update.rds"))


print(paste("Ended at", Sys.time()))
# End of R script
