# cd /accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_data/
# sbatch --cpus-per-task=4 --mem=64g --export=ALL,script=ws_1_quest.R zb_r.sh

sinfo <- commandArgs(trailingOnly = F)
args <- commandArgs(trailingOnly = T)

print(args)

sys_user <- ifelse(Sys.getenv("USERNAME")!="", Sys.getenv("USERNAME"), Sys.getenv("USER"))
sys_cluster <- Sys.getenv("SLURM_CLUSTER_NAME")
sys_batch <- ifelse(interactive(), 0, 1)

if (sys_user=="homebase") { # For replication: Replace "homebase" with current username
  dir_func <- "/accounts/projects/jrothst/homebase/data/bpea_replication_archive/code_function/"
  # For replication: Replace with the path of the "code_function/" folder
}

source(paste0(dir_func, "0_directory.R"))
print(paste("Started at", Sys.time()))

library(data.table)

print(sessionInfo())

setwd(paste0(dir_proj))

#===============================================================================
# Wave 1

# Survey data for reference 
dt_surv <- fread(paste0(dir_projws,"Data/Data_raw/employeesurvey0526.csv"), encoding = "UTF-8")

# Survey text
dt_qraw <- data.table(qtext=readLines(paste0(dir_clean,"survey_worker/survey_worker_questionnaire_wave_1.txt"), encoding = "UTF-8"))

#-------------------------------------------------------------------------------
# Extracting information

# Q #
dt_qraw[, var:=stringr::str_extract(qtext,"^Q\\d+")]
dt_qraw[, var:=var[1], .(cumsum(!is.na(var)))]

# Value labels
dt_qraw[grepl("^(o\\t|\u25A2)",qtext), lab:=sub("^(o\\t|\u25A2)","",qtext)]

# Value
dt_qraw[, val:=stringr::str_extract(lab,"\\(\\d+\\)")]
dt_qraw[, val:=as.integer(gsub("[\\(\\)]","",val))]

# Remove value from labels
dt_qraw[, lab:=trimws(sub("\\(\\d+\\)$","",trimws(lab)))]

dt_qout <- dt_qraw[!is.na(lab),c("var","val","lab")]

dt_qout[var=="Q13" & val==10, lab:="Other"]
dt_qout[var=="Q28" & val==7, lab:="Other (Please specify)"]
dt_qout[var=="Q24" & val==11, lab:="None of the above"]

irep <- 4
for (ivar in paste0(c(rep("Q15",irep),rep("Q16",irep)),"_",c(1:irep,1:irep))) {
  dti_qout <- data.table(var=ivar, val=c(1:5),
                         lab=c("Strongly agree","Somewhat agree","Neither agree nor disagree","Somewhat disagree","Strongly disagree"))
  dt_qout <- rbind(dt_qout, dti_qout)
}

irep <- 3
for (ivar in paste0(c(rep("Q32",irep),rep("Q33",irep)),"_",c(1:irep,1:irep))) {
  dti_qout <- data.table(var=ivar, val=c(1:5),
                         lab=c("Not at all","Slightly","Moderately","Considerably","A great deal"))
  dt_qout <- rbind(dt_qout, dti_qout)
}

# Fix order
dt_qout[, var_num:=as.integer(sub("Q(\\d+)($|_.*?$)","\\1",var))]
setorderv(dt_qout, c("var_num","var","val"))
dt_qout[, var_num:=NULL]

saveRDS(dt_qout, paste0(dir_clean,"survey_worker/survey_worker_var_val.rds"))
fwrite(dt_qout, paste0(dir_clean,"survey_worker/survey_worker_var_val.csv"))

print(paste("Ended at", Sys.time()))
# End of R script
