# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=6 --mem=36g --export=ALL,script=sg_1_core_poi.R --partition=high zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

print(args)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)

print(sessionInfo())

setwd(paste0(dir_proj))

#===============================================================================

lzip <- list.files(paste0(dir_raw,"safegraph/core/"),recursive = T, full.names = T)

for (izip in lzip) {
  print(izip)
  
  iver <- stringr::str_extract(izip,"\\d{4}-\\d{2}-\\d{2}\\.zip")
  iver <- sub("-","",substring(iver,1,7))
  dir_csv <- paste0(dir_raw,"safegraph/core_poi/",iver)
  if (!dir.exists(dir_csv)) {
    dir.create(dir_csv,showWarnings = F)
  }
  unzip(zipfile = izip, overwrite = T,exdir = dir_csv,setTimes = T)
  
  
  #-----------------------------------------------------------------------------
  # Read csv and save as rds
  
  lcsv <- list.files(dir_csv, full.names = T)
  lcsv <- lcsv[grepl("core_poi-part",lcsv)]
  
  dt_out <- data.table()
  
  for (i in c(1:length(lcsv))) {
    
    print(lcsv[i])
    dt_raw <- data.table::fread(paste0(lcsv[i]), colClasses = list("character"="postal_code"))
    dt_out <- rbind(dt_out, dt_raw)
    rm(dt_raw)
    
  }
  saveRDS(dt_out, paste0(dir_clean, "safegraph/safegraph_core_poi_raw_",iver,".rds"))
}

print(paste("Ended at", Sys.time()))
# End of R script
