print(paste("Started at", Sys.time()))
## [1] "Started at 2020-07-18 11:02:15"
surv_ver <- "20200707"
dt_resp <- data.table(readRDS(paste0(dir_clean, "survey_worker/homebase_worker_survey_hours_",surv_ver,".rds")))[row_sel==1 & user_base==1 & user_nfirme==1 & firm_base_2wk==1 & st_sel_2wk==1,]
dt_surv <- data.table(readRDS(paste0(dir_clean, "survey_worker/homebase_worker_survey_raw_",surv_ver,".rds")))[row_sel==1,]
dt_resp[,user_sexch:=interaction(user_gender,user_child)]
print(levels(dt_resp$user_sexch))
## [1] "Female.Yes" "Male.Yes" "Non-binary.Yes" "Female.No"
## [5] "Male.No" "Non-binary.No"
setattr(dt_resp$user_sexch,"levels",c("Female w/ Child","Male w/ Child","Non-binary w/ Child",
"Female w/o Child","Male w/o Child","Non-binary w/o Child"))
table(dt_resp$user_sexch)
##
## Female w/ Child Male w/ Child Non-binary w/ Child
## 27175 10831 7
## Female w/o Child Male w/o Child Non-binary w/o Child
## 45327 21557 2013
dt_resp[user_sexch %in% c("Non-binary w/ Child","Non-binary w/o Child"), user_sexch:=NA]
dt_resp[,user_sexch:=droplevels(user_sexch)]
table(dt_resp$user_sexch)
##
## Female w/ Child Male w/ Child Female w/o Child Male w/o Child
## 27175 10831 45327 21557
# dt_resp[,user_level:=level]
dt_dms <- readRDS(paste0(dir_clean,"survey_worker/worker_userid_var_sel.rds"))
dt_resp <- merge(dt_resp, dt_dms[,c("userid","user_level")], by=c("userid"), all.x=T)
dt_resp[, wkd:=as.POSIXlt(date)$wday]
lvar <- names(dt_resp)
lvar <- lvar[grepl("userid|user_",lvar) ] #& !grepl("user_race_",lvar)
dt_demogr <- unique(dt_resp[,.SD,.SDcols=lvar])
dt_demogr[, nobs:=1]
setcolorder(dt_demogr,c("userid","user_surv","user_base",
"user_gender","user_age","user_race","user_race2","user_marital","user_child","user_child18","user_educ","user_income","user_wage","user_level","user_nfirm","user_sexch","nobs"))
tbl_summary(dt_demogr[, -c("userid","user_surv","user_base","user_race2","user_sexch","user_nfirm")],missing = "no",type = list("user_child"~"categorical"))
| Characteristic | N = 16881 |
|---|---|
| user_gender | |
| Female | 1127 (67%) |
| Male | 514 (31%) |
| Non-binary | 40 (2.4%) |
| user_age | |
| 18-25 | 611 (36%) |
| 26-37 | 459 (27%) |
| 38-49 | 273 (16%) |
| 50-64 | 259 (15%) |
| 65 or above | 77 (4.6%) |
| user_race | |
| White | 1124 (67%) |
| Black | 152 (9.1%) |
| Hispanic | 271 (16%) |
| Asian | 96 (5.8%) |
| Native American | 15 (0.9%) |
| Pacific Islander | 9 (0.5%) |
| user_marital | |
| Single | 928 (55%) |
| Married | 445 (26%) |
| Living with partner | 169 (10%) |
| Separated | 25 (1.5%) |
| Divorced | 97 (5.8%) |
| Widowed | 20 (1.2%) |
| user_child | |
| Yes | 533 (32%) |
| No | 1152 (68%) |
| user_child18 | |
| 1 | 176 (49%) |
| 2 | 103 (29%) |
| 3 | 54 (15%) |
| 4 | 19 (5.3%) |
| More than 4 | 5 (1.4%) |
| user_educ | |
| Some high school | 116 (6.9%) |
| High school graduate | 483 (29%) |
| Two-year degree/some college | 569 (34%) |
| Bachelor's degree | 396 (24%) |
| Master's degree or more | 119 (7.1%) |
| user_income | |
| Less than $15,000 | 352 (21%) |
| $15,000-$24,999 | 363 (22%) |
| $25,000-$34,999 | 249 (15%) |
| $35,000-$44,999 | 144 (8.7%) |
| $45,000-$54,999 | 118 (7.1%) |
| $55,000-$64,999 | 102 (6.2%) |
| $65,000-$74,999 | 76 (4.6%) |
| $75,000-$84,999 | 45 (2.7%) |
| More than $85,000 | 205 (12%) |
| user_wage | |
| $5-$7.49 | 118 (7.0%) |
| $7.50-$9.99 | 252 (15%) |
| $10-$12.49 | 449 (27%) |
| $12.50-$14.99 | 350 (21%) |
| $15-$17.49 | 265 (16%) |
| $17.50-$19.99 | 81 (4.8%) |
| $20 -$22.49 | 68 (4.0%) |
| $22.50-$24.99 | 30 (1.8%) |
| $25 or higher | 69 (4.1%) |
| user_level | |
| Employee | 1589 (94%) |
| Manager | 78 (4.6%) |
| General Manager | 21 (1.2%) |
| nobs | 1688 (100%) |
| user_race_h | 273 (100%) |
| user_race_i | 20 (100%) |
| user_race_a | 99 (100%) |
| user_race_p | 10 (100%) |
| user_race_b | 152 (100%) |
| user_race_w | 1134 (100%) |
| user_nfirme | 1688 (100%) |
|
1
Statistics presented: n (%)
|
|
dt_info <- data.table(var=c("gender","age","marital","child","child18","race","income","wage","educ","level"),
vars=c("Gender","Age","Marital Status","Have Children","Number of Children (if any)","Race","Household Income","Wage in Jan 2020","Education","Job Title"))
# Alternative Race and Ethnicity, and Manager
dt_info <- rbind(dt_info, data.table(var=c("racee","ethn","manager"),vars=c("Race","Ethnicity","Job Title")))
lrace <- factor(c(1:4))
levels(lrace) <- c("White","Black","Asian or Pacific Islander","American Indian or Alaskan Native")
dt_demogr[user_race_w==1 & is.na(user_race_b) & is.na(user_race_i) & is.na(user_race_a) & is.na(user_race_p),user_racee:=lrace[1]]
dt_demogr[is.na(user_racee) & user_race_b==1,user_racee:=lrace[2]]
dt_demogr[is.na(user_racee) & user_race_a==1 | user_race_p==1,user_racee:=lrace[3]]
dt_demogr[is.na(user_racee) & user_race_i==1,user_racee:=lrace[4]]
lethn <- factor(c(1:2))
levels(lethn) <- c("Hispanic","Non-Hispanic")
dt_demogr[user_race_h==1 ,user_ethn:=lethn[1]]
dt_demogr[is.na(user_ethn) & (!is.na(user_race_w) | !is.na(user_race_b) | !is.na(user_race_i) | !is.na(user_race_a) | !is.na(user_race_p)),user_ethn:=lethn[2]]
lpos <- factor(c(1:2))
levels(lpos) <- c("Employee","Manager")
dt_demogr[user_level %in% c("Employee"),user_pos:=lpos[1]]
dt_demogr[user_level %in% c("Manager","General Manager"),user_pos:=lpos[2]]
vvar <- "gender"
dt_tabr <- data.table()
for (vvar in c("gender","age","racee","ethn","marital","child","child18","educ","income","wage","pos")) {
vvar_by <- paste0("user_",vvar)
dt_sum <- dt_demogr[,.(nobs=.N),by=c(vvar_by)]
setorderv(dt_sum, c(vvar_by))
setattr(dt_sum[[vvar_by]],"levels",c(levels(dt_sum[[vvar_by]]),"NA"))
setnames(dt_sum,vvar_by,"val")
dt_sum[is.na(val),val:="NA"]
dt_sum <- dt_sum[val!="NA",] # Remove NA from table
dt_sum[, sobs:=percent(nobs/sum(nobs),accuracy = 0.1)]
dt_sum <- dt_sum[,c("val","sobs")]
dt_sum <- dt_sum[, lapply(.SD, as.character)]
dt_tabr <- rbind(dt_tabr,data.table(val=dt_info[var==vvar,vars]),dt_sum[,var:=vvar],fill=T)
}
ibreak <- 34
dt_tab <- merge(dt_tabr[c(1:ibreak),c("val","sobs")][,row:=.I],dt_tabr[c((ibreak+1):nrow(dt_tabr)),c("val","sobs")][,row:=.I], by="row", all=T)
fwrite(dt_tab, paste0(dir_otab,"P2_surv_demogr_sum.csv"))
print(paste("Ended at", Sys.time()))
## [1] "Ended at 2020-07-18 11:02:27"
# End of R script