3 Descriptives

Load the required packages.

We then load the previously cleaned data table.

data_path <- here("Data", "cleaned_data.rds")
if (file.exists(data_path)) {
  d <- read_rds(file = data_path)
} else {
  stop(str_glue("{data_path} doesn't exist, run `01-clean.Rmd` to create it."))
}

# Make wave a nicely labelled factor
d <- d %>%
  mutate(Wave = factor(wid, levels = 1:3, labels = paste0("Wave ", 1:3)))

# Rename game to fit titles in plots
d <- d %>% 
  mutate(Game = if_else(Game == "Gran Turismo Sport", "GT Sport", Game))

3.1 Demographics after exclusions

d %>%
  filter(wid == 1) %>%
  distinct(pid, Game, Age, Experience, Gender) %>%
  select(-pid) %>%
  tbl_summary(by = Game, missing_text = "Missing") %>%
  add_overall() %>%
  bold_labels() %>%
  italicize_levels() %>%
  as_kable_extra(caption = "Sample demographics") %>% 
  kable_styling(full_width = FALSE, font_size = 12)
Table 3.1: Sample demographics
Characteristic Overall, N = 38,935 AC:NH, N = 13,646 Apex Legends, N = 1,158 EVE Online, N = 905 Forza Horizon 4, N = 1,981 GT Sport, N = 19,258 Outriders, N = 1,530 The Crew 2, N = 457
Age 34 (25, 42) 32 (25, 41) 25 (20, 32) 39 (31, 50) 33 (24, 42) 35 (25, 43) 38 (32, 45) 25 (20, 35)
Missing 82 29 5 3 6 37 2 0
Gender
Man 29,765 (77%) 5,451 (40%) 1,002 (87%) 853 (95%) 1,884 (95%) 18,745 (98%) 1,400 (92%) 430 (94%)
Non-binary / third gender 705 (1.8%) 557 (4.1%) 25 (2.2%) 7 (0.8%) 11 (0.6%) 87 (0.5%) 15 (1.0%) 3 (0.7%)
Prefer not to say 363 (0.9%) 183 (1.3%) 17 (1.5%) 10 (1.1%) 12 (0.6%) 119 (0.6%) 16 (1.0%) 6 (1.3%)
Woman 8,017 (21%) 7,426 (55%) 109 (9.5%) 32 (3.5%) 68 (3.4%) 267 (1.4%) 97 (6.3%) 18 (3.9%)
Missing 85 29 5 3 6 40 2 0
Experience 23 (16, 30) 22 (15, 30) 17 (10, 25) 25 (20, 32) 23 (15, 30) 25 (16, 30) 30 (22, 35) 17 (11, 25)
Missing 181 62 8 5 9 91 5 1
1 Median (IQR); n (%)

3.2 Survey descriptives

These are for people after exclusions.

3.2.1 Response rate & retention

# Get data on invite dates and Ns
invites <- read_csv(here("Data", "invites.csv")) %>%
  rename(Game = game) %>%
  mutate(
    Game = str_replace(Game, "Animal Crossing: New Horizons", "AC:NH"),
    Game = str_replace(Game, "Gran Turismo Sport", "GT Sport")
    )
# Create a table where wave 0 are number of invites,
# then calculate response rate / retention at each wave.
# This assumes there are no new participants at wave 3
# (people who didn't participate in wave 2 showed up at wave 3).
tmp <- bind_rows(
  select(invites, -date),
  d %>% filter(Responded) %>% count(Game, wid)
)
tmp_total <- tmp %>% 
  group_by(wid) %>% 
  summarise(n = sum(n)) %>% 
  mutate(Game = "Total")
tmp <- bind_rows(tmp, tmp_total) %>% 
  arrange(Game, wid) %>%
  group_by(Game) %>%
  mutate(
    R_rate = percent(n / lag(n), .1),
    n = comma(n)
  ) %>%
  pivot_wider(names_from = wid, values_from = c(n, R_rate)) %>%
  mutate(
    Invites = n_0,
    `Wave 1` = str_glue("{n_1} ({R_rate_1})"),
    `Wave 2` = str_glue("{n_2} ({R_rate_2})"),
    `Wave 3` = str_glue("{n_3} ({R_rate_3})")
  ) %>%
  select(Game, Invites:`Wave 3`) %>%
  mutate(across(everything(), ~ str_replace(., "NA", "0"))) %>% 
  ungroup()
tmp %>%   
  kbl(caption = "Number of people (response/retention rate) participating at each wave.") %>% 
  kable_styling(full_width = FALSE, font_size = 12)
Table 3.2: Number of people (response/retention rate) participating at each wave.
Game Invites Wave 1 Wave 2 Wave 3
AC:NH 640,000 13,536 (2.1%) 5,049 (37.3%) 4,084 (80.9%)
Apex Legends 900,000 1,128 (0.1%) 406 (36.0%) 228 (56.2%)
EVE Online 30,000 899 (3.0%) 240 (26.7%) 221 (92.1%)
Forza Horizon 4 834,515 1,959 (0.2%) 772 (39.4%) 597 (77.3%)
GT Sport 1,729,677 19,073 (1.1%) 7,699 (40.4%) 5,512 (71.6%)
Outriders 90,000.0 1,525.0 (1.7%) 379.0 (24.9%) 370.0 (97.6%)
The Crew 2 1,013,000 457 (0.0%) 97 (21.2%) 85 (87.6%)
Total 5,237,192 38,577 (0.7%) 14,642 (38.0%) 11,097 (75.8%)
# Also save this table for MS
tmp %>% 
  flextable() %>% 
  save_as_docx(path = "Figures/Retention-rates.docx")

3.2.2 Missingness

Here we report on the % missing for each variable

## Installing naniar [0.6.1] ...
##  OK [linked cache]
library(naniar)
d %>% 
  select(Game, pid, Wave, Affect, `Life satisfaction`, Hours) %>% 
  complete(nesting(Game, pid), Wave) %>% 
  group_by(Game, Wave) %>% 
  select(-pid) %>% 
  miss_var_summary() %>% 
  select(-n_miss) %>% 
  mutate(pct_miss = percent(pct_miss/100, .1)) %>% 
  pivot_wider(
    names_from = c(Wave, variable), 
    values_from = pct_miss, 
    names_glue = "{variable} ({Wave})"
    ) %>% 
  kbl(caption = "Percent missing at each wave for key variables") %>% 
  kable_styling(full_width = FALSE, font_size = 12)
Table 3.3: Percent missing at each wave for key variables
Game Affect (Wave 1) Life satisfaction (Wave 1) Hours (Wave 1) Affect (Wave 2) Life satisfaction (Wave 2) Hours (Wave 2) Affect (Wave 3) Life satisfaction (Wave 3) Hours (Wave 3)
AC:NH 3.5% 1.9% 0.2% 63.8% 63.2% 0.1% 70.6% 70.3% 0.2%
Apex Legends 8.7% 4.9% 0.0% 66.8% 65.6% 0.0% 81.1% 80.6% 0.1%
EVE Online 5.1% 2.2% 1.7% 74.6% 73.7% 0.9% 75.9% 75.7% 1.2%
Forza Horizon 4 4.5% 2.2% 0.2% 62.0% 61.5% 0.1% 70.4% 69.9% 0.1%
GT Sport 4.5% 2.2% 0.0% 60.9% 60.3% 0.0% 71.9% 71.6% 0.1%
Outriders 2.8% 1.7% 0.1% 75.8% 75.4% 0.0% 76.5% 75.9% 0.0%
The Crew 2 4.4% 2.2% 0.0% 79.9% 79.4% 0.0% 81.4% 81.4% 0.0%

3.2.3 Differences

We then looked at differences between people who dropped out vs who did not.

tmp <- d %>% 
  replace_na(list(Responded = FALSE)) %>%
  group_by(Game, pid) %>% 
  summarise(
    across(
      c(Affect, `Life satisfaction`, 
        Intrinsic, Extrinsic, 
        Age, Experience, Hours),
      mean, na.rm = TRUE
    ),
    dropped = factor(
      sum(Responded) != 3, 
      levels = c(FALSE, TRUE), 
      labels = c("No", "Yes")
    )
  )
tmp <- ungroup(tmp)
tmp <- tmp %>% 
  pivot_longer(Affect:Hours)
tmp %>%   
  ggplot(aes(dropped, value)) +
  stat_summary() +
  stat_summary(fun = mean, geom = "line", group = 1) +
  facet_grid(name~Game, scales = "free_y", margins = "Game")
out <- tmp %>% 
  group_by(name) %>% 
  summarise(
    fit = list(
      lmer(value ~ dropped + (1 + dropped | Game), data = cur_data())
    )
  )
out %>% 
  mutate(
    out = map(fit, ~tidy(.x, "fixed"))
  ) %>% 
  select(-fit) %>% 
  unnest(out) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(across(where(is.numeric), ~number(., .01))) %>% 
  transmute(
    Variable = name,
    Difference = str_glue("{estimate} ({std.error})")
  ) %>% 
  flextable(cwidth = 2) %>% 
  save_as_docx(path = "Figures/Between-dropout-table.docx")

3.2.4 Response dates

Only for actual responses (not rows where survey date was filled to be able to join telemetry)

d %>%
  # Take only actually responded-to waves
  filter(Responded) %>%
  mutate(Date = as_date(StartDate)) %>%
  count(Game, Wave, Date) %>%
  ggplot(
    aes(Date, n, fill = Wave)
  ) +
  geom_col() +
  scale_y_continuous(
    "Responses",
    breaks = pretty_breaks(),
    expand = expansion(c(0, .1)),
  ) +
  scale_x_date(
    "Date",
    date_breaks = "7 day", date_labels = "%b\n%d", date_minor_breaks = "1 day"
  ) +
  facet_wrap("Game", scales = "free_y", ncol = 1)
Histograms of response dates.

Figure 3.1: Histograms of response dates.

Response times

d %>%
  filter(Responded) %>%
  mutate(Hour = hour(StartDate)) %>%
  count(Game, Wave, Hour) %>%
  ggplot(aes(Hour, y = n, fill = Wave)) +
  scale_y_continuous(
    "Responses",
    breaks = pretty_breaks(),
    expand = expansion(c(0, .1)),
  ) +
  scale_x_continuous(
    breaks = seq(0, 21, by = 3),
    expand = expansion(c(0.01))
  ) +
  geom_col() +
  facet_wrap("Game", scales = "free", ncol = 2) +
  theme(legend.position = "bottom")
Histograms of response times (in UTC).

Figure 3.2: Histograms of response times (in UTC).

3.2.4.1 Durations between waves

Participants could respond with variable delays due to variation in email schedules and late responding. So we also check the actual intervals between completing waves. Very small values are possible because a participant could have e.g. completed both waves 2 and 3 in succession after receiving wave 3 invitation. Note that negative values were also possible for this reason but they were excluded before. (This figure is restricted to 5-30 day intervals to display the bulk of the data.)

# Table
d %>%
  select(Wave, Game, interval) %>%
  # group_by(Wave) %>%
  filter(Wave != "Wave 1") %>%
  summarise(
    Value = quantile(
      interval,
      probs = c(0, .10, .25, .5, .75, .90, 1),
      na.rm = T
    ) %>%
      round(3),
    Quantile = percent(c(0, .10, .25, .5, .75, .90, 1))
  ) %>%
  pivot_wider(names_from = Quantile, values_from = Value) %>%
  kbl(caption = "Interval duration percentiles preceding waves 2 and 3.") %>% 
  kable_styling(full_width = FALSE, font_size = 12)
Table 3.4: Interval duration percentiles preceding waves 2 and 3.
0% 10% 25% 50% 75% 90% 100%
0.001 11.649 13.706 14.046 15.941 17.105 49.287
# Figure
d %>%
  filter(Wave != "Wave 1") %>%
  filter(between(interval, 5, 30)) %>% 
  mutate(Wave = fct_drop(Wave)) %>%
  ggplot(aes(interval)) +
  geom_vline(xintercept = 14, size = .2) +
  geom_histogram(binwidth = 1, col = "white") +
  scale_y_continuous(
    "Count",
    expand = expansion(c(0, .1))
  ) +
  scale_x_continuous(
    "Days between responding",
    breaks = pretty_breaks()
  ) +
  facet_grid(Game ~ Wave, scales = "free_y")
Histograms of intervals between participants completing the survey waves (in days).

Figure 3.3: Histograms of intervals between participants completing the survey waves (in days).

3.3 System information

## R version 4.1.3 (2022-03-10)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
##  [1] naniar_0.6.1      lubridate_1.8.0   forcats_0.5.1     stringr_1.4.0    
##  [5] dplyr_1.0.7       purrr_0.3.4       readr_2.0.2       tidyr_1.1.4      
##  [9] tibble_3.1.5      tidyverse_1.3.1   here_1.0.1        kableExtra_1.3.4 
## [13] broom.mixed_0.2.7 lme4_1.1-27.1     Matrix_1.3-4      flextable_0.7.0  
## [17] gtsummary_1.4.2   scales_1.1.1      knitr_1.36        ggplot2_3.3.5    
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-153        fs_1.5.0            bit64_4.0.5        
##  [4] webshot_0.5.2       httr_1.4.2          rprojroot_2.0.2    
##  [7] tools_4.1.3         backports_1.2.1     bslib_0.3.1        
## [10] utf8_1.2.2          R6_2.5.1            DBI_1.1.1          
## [13] colorspace_2.0-2    withr_2.4.2         tidyselect_1.1.1   
## [16] downlit_0.2.1       bit_4.0.4           compiler_4.1.3     
## [19] textshaping_0.3.5   cli_3.0.1           rvest_1.0.1        
## [22] gt_0.3.1            xml2_1.3.2          officer_0.4.1      
## [25] labeling_0.4.2      bookdown_0.24       sass_0.4.0         
## [28] systemfonts_1.0.2   digest_0.6.28       minqa_1.2.4        
## [31] rmarkdown_2.11      svglite_2.0.0       base64enc_0.1-3    
## [34] pkgconfig_2.0.3     htmltools_0.5.2     highr_0.9          
## [37] dbplyr_2.1.1        fastmap_1.1.0       readxl_1.3.1       
## [40] rlang_0.4.11        rstudioapi_0.13     farver_2.1.0       
## [43] jquerylib_0.1.4     generics_0.1.0      jsonlite_1.7.2     
## [46] vroom_1.5.5         zip_2.2.0           magrittr_2.0.1     
## [49] Rcpp_1.0.7          munsell_0.5.0       fansi_0.5.0        
## [52] gdtools_0.2.4       visdat_0.5.3        lifecycle_1.0.1    
## [55] stringi_1.7.5       yaml_2.2.1          MASS_7.3-54        
## [58] grid_4.1.3          parallel_4.1.3      crayon_1.4.1       
## [61] lattice_0.20-45     haven_2.4.3         splines_4.1.3      
## [64] hms_1.1.1           pillar_1.6.3        uuid_0.1-4         
## [67] boot_1.3-28         codetools_0.2-18    reprex_2.0.1       
## [70] glue_1.4.2          evaluate_0.14       modelr_0.1.8       
## [73] data.table_1.14.2   broom.helpers_1.4.0 renv_0.14.0        
## [76] tzdb_0.1.2          vctrs_0.3.8         nloptr_1.2.2.2     
## [79] cellranger_1.1.0    gtable_0.3.0        assertthat_0.2.1   
## [82] xfun_0.26           broom_0.7.9         ragg_1.1.3         
## [85] survival_3.2-13     viridisLite_0.4.0   ellipsis_0.3.2