# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=4 --mem=64g --export=ALL,script=sg_1_visit_sel_loc.R zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

print(args)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)

print(sessionInfo())

setwd(paste0(dir_proj))

#===============================================================================

dir_cleans <- paste0(dir_clean,"safegraph/")

source(paste0(dir_func,"f_wk.R"))

#===============================================================================

dt_raw <- readRDS(paste0(dir_cleans, "safegraph_raw_lite_2020.rds"))

lvvist <- names(dt_raw)[grepl("visit_wkd_", names(dt_raw))]
dt_sel <- dt_raw[, .SD, .SDcols=c("safegraph_place_id","region","postal_code",
                                  "date_range_start","date_range_end",lvvist)]
setnames(dt_sel, c("safegraph_place_id","region","postal_code","date_range_start","date_range_end"),
         c("locid","st_str","zip","date_start","date_end"))
setnames(dt_sel, lvvist, gsub("visit_wkd_","",lvvist))
rm(dt_raw)

#-------------------------------------------------------------------------------
# Reshape to long
dt_sel <- melt(dt_sel, id.vars=c("locid","st_str","zip","date_start","date_end"), 
               variable.name = "wkd", value.name = "visit")
dt_sel[, wkd:=as.integer(wkd)-1]
dt_sel[, date:=date_start+wkd]
print(paste0("date <= date_end: ",nrow(dt_sel[date>date_end])))
dt_sel[, week:=f_wk(date)]

# saveRDS(dt_sel[,c("locid","st_str","zip","date","week","wkd","visit")], 
#         paste0(dir_cleans, "safegraph_raw_lite_2020_slong.rds"))

#-------------------------------------------------------------------------------
# Select locations with positive visit during reference period

dt_self <- dt_sel[, lapply(.SD, sum, na.rm=T), by=c("locid","week"), .SDcols=c("visit")]
dt_self <- dt_self[week %in% f_wk(c("2020-01-19","2020-02-01")),]

print(nrow(dt_self[is.na(visit)]))
print(nrow(dt_self[visit <=0]))

dt_out <- dt_self[, lapply(.SD, sum, na.rm=T), by=c("locid"), .SDcols=c("visit")]
print(nrow(dt_out[visit <=0]))

saveRDS(dt_out, paste0(dir_cleans, "safegraph_sel_loc.rds"))

print(paste("Ended at", Sys.time()))
# End of R script
