
# source(paste0(dir_proj,"1_do_bpea/bpea_0_f_reg.R"))
# dt_in: Data input
# reg_lhs: LHS variable
# reg_ord: "stay"|"reop"|"act" -- Order types: shutdown, reopen, active
# reg_type: "dd","es","res" -- Basics DD, basic event study, restricted event study
# reg_stdate: Whether to include state-specific time trends for restricted event study

# Dummies: 
# ip_*: post order dummy
# ie_*: ever order dummy
# ie_*_nd_*: days since order dummy (ie_*_nd_n* means prior to order)

# # For testing
# 
# dt_in <- copy(dt_hours)
# reg_lhs <- "r_hours"
# reg_ord <- "stay"
# reg_type <- "dd"
# 
# reg_ord <- "act"
# reg_type <- "es"

#===============================================================================
# Function that returns a balanced panel based on regression and order type

f_drange <- function(dt_in,reg_ord,reg_type) {
  
  min_drange <- as.Date("2020-02-16") # Min range
  max_dstay <- max(dt_in[,dstay],na.rm=T) # Last shutdown
  min_dreop <- min(dt_in[,dreop],na.rm=T) # First reopen
  max_fullwk <- max(dt_in[ndaywk==7,date]) # Last full week
  
  # Select balanced sample
  if (reg_ord %in% c("stay","school")) {
    # 2/16 - first reopening
    min_dsel <- min_drange
    max_dsel <- min_dreop - 1
  } else if (reg_ord %in% c("reop")) {
    # last shutting down - present (last full week)
    min_dsel <- max_dstay
    max_dsel <- max_fullwk
  } else if (reg_ord %in% c("act","pua","fpuc")) {
    # 2/16 - present (last full week)
    min_dsel <- min_drange
    max_dsel <- max_fullwk
  }
  
  print(paste0("Date Range: ",min_dsel," - ",max_dsel))
  
  dt_in <- dt_in[date>=min_dsel & date<=max_dsel,]
  
  # Check it is balanced
  dt_tmp <- dt_in[,.(nday=.N),by=c("st")]
  if (any(dt_tmp$nday!=max(dt_tmp$nday))) {
    print("WARNING: NOT BALANCED")
    lst <- as.character(dt_tmp[nday!=max(nday),st])
    print(paste0("Remove Unbalanced States: ",paste(lst,collapse=" ")))
    dt_in <- dt_in[!st %in% lst,] ### This should probably be changed
  }
  # Check if date range in data matches selected date range
  if (min(dt_in$date)!=min_dsel | max(dt_in$date)!=max_dsel) {
    print("ERROR: DATE RANGE NOT MATCHED")
  }
  # Check if any date is missing
  ndate <- max(dt_in$date) - min(dt_in$date) + 1
  if (ndate != nrow(unique(dt_in[,c("date")]))) {
    print("ERROR: SOME DATE MISSING")
  }
  
  return(dt_in)
}

#===============================================================================
# Function that runs regressions

f_regst <- function(dt_in,dt_regu=data.table(),reg_lhs,reg_ord,reg_type,reg_stdate=F,reg_drange=c(),save_dt="") {
  # dt_in: Data input
  # dt_reg: regulation data
  # reg_lhs: LHS variable
  # reg_ord: "stay"|"reop"|"act"|"school"|"dpua"|"dfpuc" -- Order types: shutdown, reopen, active, school closure, two UI types
  # reg_type: "dd","es","res" -- Basics DD, basic event study, restricted event study
  # reg_stdate: Whether to include state-specific time trends for restricted event study
  
  print(paste(rep("-",40),collapse=""))
  print(paste(c(reg_lhs,reg_ord,reg_type,reg_stdate)))
  
  # Load regulation and state info
  dt_stc <- readRDS(paste0(dir_clean, "date/cw_date_st_reg.rds"))
  dt_stc[is.na(dstay), dreop:=NA] # Remove dreop if dstay is NA
  dt_stc[, nd_stayreop:=dreop-dstay] # Days between orders
  dt_ui <- readRDS(paste0(dir_clean, "date/cw_date_ui.rds"))
  dt_school <- readRDS(paste0(dir_clean, "date/cw_date_school.rds"))
  
  # Merge regulation and state with main data
  dt_in <- merge(dt_in, dt_stc, by=c("st"), all.x=T)
  dt_in <- merge(dt_in, dt_ui[,c("st","dpua","dfpuc")], by=c("st"), all.x=T)
  dt_in <- merge(dt_in, dt_school[,c("st","dschool")], by=c("st"), all.x=T)
  
  # Merge additional regulation info
  if (nrow(dt_regu)!=0) {
    dt_in <- merge(dt_in, dt_regu, by=c("st"), all.x=T)
  }
  
  # Merge state FIPS code if necessary
  dt_st <- readRDS(paste0(dir_clean, "cw/cw_geo_nber_state.rds"))
  if (!"stfips" %in% names(dt_in)) {
    dt_in <- merge(dt_in, dt_st[,c("st","stfips")],by=c("st"),all.x=T)
  }
  
  rm(dt_stc, dt_ui, dt_school, dt_st)
  
  # Subsetting by state
  dt_in <- dt_in[stfips<=56,] # Keep only actual states and DC
  
  # For reopen orders, remove states that never had a stay-at-home order
  if (reg_ord %in% c("reop","stay_end")) {
    print("Drop states with no stay-at-home order")
    print(sort(unique(dt_in[is.na(dstay),st])))
    dt_in <- dt_in[!is.na(dstay),]
  }
  
  #-------------------------------------------------------------------------------
  # Prepare for regressions:
  
  # Keep selected dates
  if (length(reg_drange)!=0) {
    print(paste("Manual Date Range:",paste(reg_drange,collapse=" - ")))
    dt_in <- dt_in[date>=as.Date(reg_drange[1]) & date<=as.Date(reg_drange[2])]
  } else {
    dt_in <- f_drange(dt_in,reg_ord,reg_type) 
  }
  dt_in[, datef:=factor(date)] # Date as factor
  
  #-------------------------------------
  # Generate order dummy, days since order dummy
  
  if (reg_ord %in% c("act")) {
    lvdumgen <- c("stay","reop")
  } else if (reg_ord %in% c("pua","fpuc")) {
    lvdumgen <- c("stay","reop",reg_ord)
  } else {
    lvdumgen <- c(reg_ord)
  }
  
  # c("stay","reop","pua","fpuc","school")
  for (iorder in lvdumgen) {
    vodate <- paste0("d",iorder) # Order date
    vodume <- paste0("ie_",iorder) # Dummy for order ever
    vodump <- paste0("ip_",iorder) # Dummy for post order
    vonday <- paste0("nd_",iorder) # Days since order
    
    dt_in[, (vodume):=ifelse(is.na(get(vodate)), 0, 1)] # Dummy for ever order
    dt_in[, (vodump):=ifelse(date>=get(vodate), 1, 0)] # Dummy for post order
    dt_in[is.na(get(vodate)), (vodump):=0] # Dummy for post order (when order date is NA)
    dt_in[, (vonday):=date-get(vodate)] # Days since order
    
    # Create dummies for ever order * days since order
    for (id in c(min(dt_in[,get(vonday)],na.rm=T):max(dt_in[,get(vonday)],na.rm=T))) {
      if (id < 0) {
        # Prior to order date
        dt_in[, (paste0(vodume,"_nd_n",abs(id))):=0]
        dt_in[get(vodume)==1 & get(vonday)==id, (paste0(vodume,"_nd_n",abs(id))):=1]
      } else {
        # On or after order date
        dt_in[, (paste0(vodume,"_nd_",abs(id))):=0]
        dt_in[get(vodume)==1 & get(vonday)==id, (paste0(vodume,"_nd_",abs(id))):=1]
      }
    }
    
  }
  rm(vodate,vodume,vodump,vonday)
  
  #-------------------------------------------------------------------------------
  # Linear Models
  
  #-------------------------------------
  # Basic DD
  
  if (reg_type=="dd") {
    
    vodum <- paste0("ip_",reg_ord) # Post order dummy
    if (reg_ord=="act") {
      # Active order dummy
      vodum <- c("i_act")
      dt_in[, i_act:=ip_stay]
      dt_in[ip_reop==1, i_act:=0]
    }
    
    # Formula for lm
    lm_f <- paste0(reg_lhs, " ~ ", paste(c(vodum,"st","datef"),collapse = "+"))
  }
  
  #-------------------------------------
  # Basic Event Study | Restricted Event Study
  
  if (reg_type %in% c("es","res")) {
    
    # Select event day dummy (removing one as baseline)
    ldum <- names(dt_in)
    ldumreg <- c() # The list of event day dummy to keep
    
    if (reg_ord %in% c("act")) {
      ldumreg <- c(ldumreg,ldum[grepl(paste0("ie_(stay|reop)_nd_"), ldum)])
    }
    if (!reg_ord %in% c("act")) {
      ldumreg <- c(ldumreg,ldum[grepl(paste0("ie_",reg_ord,"_nd_"), ldum)])
    }
    if (reg_ord %in% c("pua","fpuc")) {
      dt_in[, i_act:=ip_stay]
      dt_in[ip_reop==1, i_act:=0]
      ldumreg <- c(ldumreg,"i_act") # Also include a dummy whether shutdown order is in effect
    }
    
    # Remove dummies for basic event study
    if (reg_type=="es") {
      
      # Check if there are still untreated states for shutdown order, recall that ie_* means ever treated
      i_run <- reg_ord %in% c("stay","act")
      if (i_run) {
      i_untreat <- nrow(dt_in[ie_stay==0,]) > 0
      # With untreated states, remove day -14
      # Without untreated states, also remove day -28
      if (i_untreat) {
        print("Shutdown: Not all treated")
        ldumreg <- ldumreg[! ldumreg %in% c("ie_stay_nd_n14")]
      } else {
        print("Shutdown: All treated")
        ldumreg <- ldumreg[! ldumreg %in% c("ie_stay_nd_n14", "ie_stay_nd_n28")]
      }
      } # End of if (i_run)
      
      # Check if there are still untreated states for reopen order, recall that ie_* means ever treated
      i_run <- reg_ord %in% c("reop","act")
      if (i_run) {
      i_untreat <- nrow(dt_in[ie_reop==0,]) > 0
      # With untreated states, remove day -7
      # Without untreated states, also remove day -14
      if (i_untreat) {
        print("Reopen: Not all treated")
        ldumreg <- ldumreg[! ldumreg %in% c("ie_reop_nd_n7")]
      } else {
        print("Reopen: All treated")
        ldumreg <- ldumreg[! ldumreg %in% c("ie_reop_nd_n7", "ie_reop_nd_n14")]
      }
      } # End of if (i_run)
      
      # Check if there are still untreated states for school closure, recall that ie_* means ever treated
      i_run <- reg_ord %in% c("school")
      if (i_run) {
        i_untreat <- nrow(dt_in[get(paste0("ie_",reg_ord))==0,]) > 0
        # With untreated states, remove day -14
        # Without untreated states, also remove day -28
        if (i_untreat) {
          print("Shutdown: Not all treated")
          ldumreg <- ldumreg[! ldumreg %in% c(paste0("ie_",reg_ord,"_nd_n14"))]
        } else {
          print("Shutdown: All treated")
          ldumreg <- ldumreg[! ldumreg %in% c(paste0("ie_",reg_ord,"_nd_n14"), paste0("ie_",reg_ord,"_nd_n28"))]
        }
      } # End of if (i_run)
      
      # Check if there are still untreated states for UI, recall that ie_* means ever treated
      i_run <- reg_ord %in% c("pua","fpuc")
      if (i_run) {
      i_untreat <- nrow(dt_in[get(paste0("ie_",reg_ord))==0,]) > 0
      # With untreated states for UI, remove day -1
      # Without untreated states for UI, also remove day -8
      if (i_untreat) {
        print("UI: Not all treated")
        ldumreg <- ldumreg[! ldumreg %in% c(paste0("ie_",reg_ord,"_nd_n1"))]
      } else {
        print("UI: All treated")
        ldumreg <- ldumreg[! ldumreg %in% c(paste0("ie_",reg_ord,"_nd_n1"),paste0("ie_",reg_ord,"_nd_n8"))]
      }
      } # End of if (i_run)
      
    } # End of if (reg_type=="es")
    
    # Remove dummies for restricted event study
    # Remove pre-treatment event time indicators for days more than 7 days prior to treatment
    if (reg_type=="res") {
      for (idum in ldumreg) {
        if (grepl("_n\\d+$",idum)) {
          idumd <- as.integer(sub("^.*?_n(\\d+)$","\\1",idum))
          if (idumd>7) {
            ldumreg <- ldumreg[ldumreg!=idum]
          }
        }
      }
    }
    
    # Add state-specific time trend for restricted event study
    if (reg_type=="res" & reg_stdate==T) {
      dt_in[,day20:=as.integer(date-as.Date("2020-01-01"))+1]
      for (ist in unique(as.character(dt_in$st))) {
        dt_in[, (paste0("stdate_",ist)):=0]
        dt_in[st==ist, (paste0("stdate_",ist)):=day20]
      }
      lstdate <- names(dt_in)
      lstdate <- lstdate[grepl("stdate_", lstdate) & lstdate!="stdate_CA"] # CA as baseline
      ldumreg <- c(ldumreg, lstdate)
    }
    
    # Formula for lm
    lm_f <- paste0(reg_lhs, " ~ ", paste(c(ldumreg,"st","datef"),collapse = "+"))
  }
  
  if (save_dt!="") {
    saveRDS(dt_in, paste0(save_dt,".rds"))
    if ("original.hits" %in% names(dt_in)) {
      setnames(dt_in, "original.hits", "original_hits")
    }
    haven::write_dta(dt_in, paste0(save_dt,".dta"))
  }
  
  print(lm_f)
  lm_out <- lm(formula = as.formula(lm_f), data=dt_in)
  
  # Print out coefficients not estimated:
  lcoef <- lm_out$coefficients
  dt_out <- data.table(var=names(lcoef),coef=lcoef)
  if (nrow(dt_out[is.na(coef),]) != 0) {
    print(paste0("OMMITTED: ",paste(dt_out[is.na(coef),var], collapse = " ")))
  }
  print("")
  
  return(lm_out)
}

# End of R script