Chapter 5 Data descriptives

5.1 Data

mtf <- read_rds("data/mtf.rds")
us <- read_rds("data/us.rds")
yrbs <- read_rds("data/yrbs.rds")

5.1.1 Sample sizes and years

Sample sizes per dataset are the number of rows that include the outcome variable (there are two for US). The total sample size in the entire manuscript is the sum without US-emotion (including both US outcomes would duplicate its N).

tibble(
  mtf = drop_na(mtf, Depression) %>% nrow,
  us_conduct = drop_na(us, Conduct) %>% nrow,
  us_emotion = drop_na(us, Emotion) %>% nrow,
  yrbs = drop_na(yrbs, Suicide) %>% nrow
) %>% 
  mutate(total = mtf + us_conduct + yrbs) %>% 
  mutate(across(.fns = ~comma(.))) %>% 
  kable()
mtf us_conduct us_emotion yrbs total
380,924 19,190 19,184 30,447 430,561

Actual sample sizes used in analyses are the intersections of non-missing values for the two variables (outcome, technology) used in each model.

doit <- function(data, name, x, y) {
  data <- data %>% 
    drop_na(all_of(x), all_of(y))
  data %>% 
    count(Sex) %>% 
    pivot_wider(names_from = Sex, values_from = n) %>% 
    mutate(Total = Female + Male) %>%
    mutate(
      across(
        c(Female, Male), 
        ~str_glue("{comma(.)} ({percent(./Total)})")
      )
    ) %>% 
    mutate(across(where(is.numeric), ~comma(.))) %>% 
    mutate(Years = paste(range(data$Year), collapse = " - ")) %>% 
    mutate(Dataset = name, Technology = x, Outcome = y) %>% 
    select(Dataset, Technology, Outcome, Years, Total, Female, Male)
}
bind_rows(
  doit(mtf, "MTF", "TV", "Depression"),
  doit(mtf, "MTF", "SM", "Depression"),
  doit(us, "US", "TV", "Emotion"),
  doit(us, "US", "SM", "Emotion"),
  doit(us, "US", "TV", "Conduct"),
  doit(us, "US", "SM", "Conduct"),
  doit(yrbs, "YRBS", "TV", "Suicide"),
  doit(yrbs, "YRBS", "DV", "Suicide")
) %>% 
  arrange(Dataset, Technology, Outcome) %>% 
  kable()
Dataset Technology Outcome Years Total Female Male
MTF SM Depression 2009 - 2017 120,265 61,792 (51%) 58,473 (49%)
MTF TV Depression 1991 - 2017 367,444 191,859 (52%) 175,585 (48%)
US SM Conduct 2009 - 2017 18,815 9,434 (50%) 9,381 (50%)
US SM Emotion 2009 - 2017 18,811 9,432 (50%) 9,379 (50%)
US TV Conduct 2009 - 2017 19,079 9,554 (50%) 9,525 (50%)
US TV Emotion 2009 - 2017 19,074 9,552 (50%) 9,522 (50%)
YRBS DV Suicide 2007 - 2017 29,450 15,424 (52%) 14,026 (48%)
YRBS TV Suicide 2007 - 2017 29,433 15,422 (52%) 14,011 (48%)

5.1.2 Missingness

mtf_db_na <- rowSums(is.na(select(mtf, starts_with("D_B_")))) 
kable(prop.table(table(mtf_db_na)), digits = 3)
mtf_db_na Freq
0 0.949
1 0.027
2 0.006
3 0.005
4 0.007
5 0.007
us_sdq_na <- rowSums(is.na(select(us, starts_with("sdq")))) 
kable(prop.table(table(us_sdq_na)), digits = 3)
us_sdq_na Freq
0 0.958
1 0.025
2 0.004
3 0.001
4 0.001
5 0.000
6 0.001
7 0.000
8 0.000
9 0.000
10 0.010

5.1.3 Outcomes

mtf %>% 
  select(starts_with("D_B_")) %>% 
  psych::alpha() %>% 
  summary
## 
## Reliability analysis   
##  raw_alpha std.alpha G6(smc) average_r S/N     ase mean   sd median_r
##       0.85      0.85    0.84      0.48 5.5 0.00039    2 0.95     0.44
cor(
  select(mtf, starts_with("D_B_")), 
  use = "pairwise.complete.obs"
) %>% 
  round(2)
##       D_B_1 D_B_2 D_B_3 D_B_4 D_B_5 D_B_6
## D_B_1  1.00  0.60  0.48  0.34  0.56  0.34
## D_B_2  0.60  1.00  0.57  0.43  0.63  0.45
## D_B_3  0.48  0.57  1.00  0.39  0.59  0.39
## D_B_4  0.34  0.43  0.39  1.00  0.38  0.64
## D_B_5  0.56  0.63  0.59  0.38  1.00  0.40
## D_B_6  0.34  0.45  0.39  0.64  0.40  1.00
sdq_con <- c("sdqe", "sdqg", "sdql", "sdqr", "sdqv")
sdq_emo <- c("sdqc", "sdqh", "sdqm", "sdqp", "sdqx")
us %>% 
  select(all_of(sdq_con)) %>% 
  psych::alpha() %>% 
  summary
## 
## Reliability analysis   
##  raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.62      0.62    0.58      0.25 1.6 0.0041  1.4 0.36     0.24
us %>% 
  select(all_of(sdq_emo)) %>% 
  psych::alpha() %>% 
  summary
## 
## Reliability analysis   
##  raw_alpha std.alpha G6(smc) average_r S/N    ase mean   sd median_r
##       0.69      0.69    0.65      0.31 2.2 0.0035  1.6 0.45     0.29
cor(
  select(us, all_of(sdq_con)), 
  use = "pairwise.complete.obs"
) %>% 
  round(2)
##      sdqe sdqg sdql sdqr sdqv
## sdqe 1.00 0.30 0.34 0.34 0.16
## sdqg 0.30 1.00 0.23 0.24 0.15
## sdql 0.34 0.23 1.00 0.28 0.21
## sdqr 0.34 0.24 0.28 1.00 0.23
## sdqv 0.16 0.15 0.21 0.23 1.00
cor(
  select(us, all_of(sdq_emo)), 
  use = "pairwise.complete.obs"
) %>% 
  round(2)
##      sdqc sdqh sdqm sdqp sdqx
## sdqc 1.00 0.26 0.28 0.19 0.21
## sdqh 0.26 1.00 0.42 0.39 0.39
## sdqm 0.28 0.42 1.00 0.29 0.30
## sdqp 0.19 0.39 0.29 1.00 0.37
## sdqx 0.21 0.39 0.30 0.37 1.00

5.2 Figures

mtf %>% 
  pivot_longer(c(TV, SM, Depression)) %>% 
  drop_na(value) %>% 
  add_count(name) %>% 
  mutate(name = str_glue("{name} (N = {comma(n)})")) %>% 
  ggplot(aes(Year, value)) +
  geom_blank() +
  stat_summary(fun=mean, geom="line") +
  facet_wrap("name", scales = "free_y")

us %>% 
  pivot_longer(c(SM, TV, Emotion, Conduct)) %>% 
  drop_na(value) %>% 
  add_count(name) %>% 
  mutate(name = str_glue("{name} (N = {comma(n, accuracy = 1)})")) %>%
  ggplot(aes(Year, value)) +
  geom_blank() +
  stat_summary(fun=mean, geom="line") +
  facet_wrap("name", scales = "free_y")

yrbs %>% 
  pivot_longer(c(TV, DV, sad_lonely:suicide_3)) %>% 
  drop_na(value) %>% 
  add_count(name) %>% 
  mutate(name = str_glue("{name} (N = {comma(n)})")) %>%
  ggplot(aes(Year, value)) +
  geom_blank() +
  stat_summary(fun=mean, geom="line") +
  facet_wrap("name", scales = "free_y")

options(width = 120)
library(sessioninfo)
session_info()
## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 4.0.3 (2020-10-10)
##  os       macOS Big Sur 10.16         
##  system   x86_64, darwin17.0          
##  ui       X11                         
##  language (EN)                        
##  collate  en_GB.UTF-8                 
##  ctype    en_GB.UTF-8                 
##  tz       Europe/London               
##  date     2021-03-01                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
##  package     * version    date       lib source                            
##  assertthat    0.2.1      2019-03-21 [1] CRAN (R 4.0.0)                    
##  backports     1.2.1      2020-12-09 [1] CRAN (R 4.0.2)                    
##  bookdown      0.21.6     2021-03-01 [1] Github (rstudio/bookdown@ca0145f) 
##  broom       * 0.7.5.9000 2021-03-01 [1] Github (tidymodels/broom@0b3528b) 
##  bslib         0.2.4      2021-01-25 [1] CRAN (R 4.0.3)                    
##  cellranger    1.1.0      2016-07-27 [1] CRAN (R 4.0.0)                    
##  cli           2.3.1      2021-02-23 [1] CRAN (R 4.0.3)                    
##  colorspace    2.0-0      2020-11-11 [1] CRAN (R 4.0.2)                    
##  crayon        1.4.1      2021-02-08 [1] CRAN (R 4.0.3)                    
##  DBI           1.1.1      2021-01-15 [1] CRAN (R 4.0.2)                    
##  dbplyr        2.1.0      2021-02-03 [1] CRAN (R 4.0.2)                    
##  digest        0.6.27     2020-10-24 [1] CRAN (R 4.0.2)                    
##  dplyr       * 1.0.4      2021-02-02 [1] CRAN (R 4.0.2)                    
##  ellipsis      0.3.1      2020-05-15 [1] CRAN (R 4.0.0)                    
##  evaluate      0.14       2019-05-28 [1] CRAN (R 4.0.0)                    
##  fansi         0.4.2      2021-01-15 [1] CRAN (R 4.0.2)                    
##  forcats     * 0.5.1      2021-01-27 [1] CRAN (R 4.0.2)                    
##  fs            1.5.0      2020-07-31 [1] CRAN (R 4.0.2)                    
##  generics      0.1.0      2020-10-31 [1] CRAN (R 4.0.2)                    
##  ggplot2     * 3.3.3      2020-12-30 [1] CRAN (R 4.0.2)                    
##  ggstance    * 0.3.5      2020-12-17 [1] CRAN (R 4.0.2)                    
##  glue          1.4.2      2020-08-27 [1] CRAN (R 4.0.2)                    
##  gtable        0.3.0      2019-03-25 [1] CRAN (R 4.0.0)                    
##  haven         2.3.1      2020-06-01 [1] CRAN (R 4.0.0)                    
##  highr         0.8        2019-03-20 [1] CRAN (R 4.0.0)                    
##  hms           1.0.0      2021-01-13 [1] CRAN (R 4.0.2)                    
##  htmltools     0.5.1.1    2021-01-22 [1] CRAN (R 4.0.2)                    
##  httr          1.4.2      2020-07-20 [1] CRAN (R 4.0.2)                    
##  jquerylib     0.1.3      2020-12-17 [1] CRAN (R 4.0.2)                    
##  jsonlite      1.7.2      2020-12-09 [1] CRAN (R 4.0.2)                    
##  knitr       * 1.31       2021-01-27 [1] CRAN (R 4.0.2)                    
##  lattice       0.20-41    2020-04-02 [1] CRAN (R 4.0.3)                    
##  lifecycle     1.0.0      2021-02-15 [1] CRAN (R 4.0.2)                    
##  lubridate     1.7.9.2    2020-11-13 [1] CRAN (R 4.0.2)                    
##  magrittr      2.0.1      2020-11-17 [1] CRAN (R 4.0.2)                    
##  mnormt        2.0.2      2020-09-01 [1] CRAN (R 4.0.2)                    
##  modelr        0.1.8      2020-05-19 [1] CRAN (R 4.0.0)                    
##  munsell       0.5.0      2018-06-12 [1] CRAN (R 4.0.0)                    
##  nlme          3.1-152    2021-02-04 [1] CRAN (R 4.0.2)                    
##  pacman        0.5.1      2019-03-11 [1] CRAN (R 4.0.0)                    
##  pillar        1.5.0      2021-02-22 [1] CRAN (R 4.0.3)                    
##  pkgconfig     2.0.3      2019-09-22 [1] CRAN (R 4.0.0)                    
##  psych         2.0.12     2020-12-16 [1] CRAN (R 4.0.2)                    
##  purrr       * 0.3.4      2020-04-17 [1] CRAN (R 4.0.0)                    
##  R6            2.5.0      2020-10-28 [1] CRAN (R 4.0.2)                    
##  Rcpp          1.0.6      2021-01-15 [1] CRAN (R 4.0.2)                    
##  readr       * 1.4.0      2020-10-05 [1] CRAN (R 4.0.2)                    
##  readxl        1.3.1      2019-03-13 [1] CRAN (R 4.0.0)                    
##  reprex        1.0.0      2021-01-27 [1] CRAN (R 4.0.2)                    
##  rlang         0.4.10     2020-12-30 [1] CRAN (R 4.0.2)                    
##  rmarkdown     2.7.2      2021-03-01 [1] Github (rstudio/rmarkdown@9bfaf4a)
##  rstudioapi    0.13       2020-11-12 [1] CRAN (R 4.0.2)                    
##  rvest         0.3.6      2020-07-25 [1] CRAN (R 4.0.2)                    
##  sass          0.3.1      2021-01-24 [1] CRAN (R 4.0.2)                    
##  scales      * 1.1.1      2020-05-11 [1] CRAN (R 4.0.0)                    
##  sessioninfo * 1.1.1      2018-11-05 [1] CRAN (R 4.0.0)                    
##  stringi       1.5.3      2020-09-09 [1] CRAN (R 4.0.2)                    
##  stringr     * 1.4.0      2019-02-10 [1] CRAN (R 4.0.0)                    
##  tibble      * 3.1.0      2021-02-25 [1] CRAN (R 4.0.2)                    
##  tidyr       * 1.1.2      2020-08-27 [1] CRAN (R 4.0.2)                    
##  tidyselect    1.1.0      2020-05-11 [1] CRAN (R 4.0.0)                    
##  tidyverse   * 1.3.0      2019-11-21 [1] CRAN (R 4.0.0)                    
##  tmvnsim       1.0-2      2016-12-15 [1] CRAN (R 4.0.0)                    
##  utf8          1.1.4      2018-05-24 [1] CRAN (R 4.0.0)                    
##  vctrs         0.3.6      2020-12-17 [1] CRAN (R 4.0.2)                    
##  withr         2.4.1      2021-01-26 [1] CRAN (R 4.0.2)                    
##  xfun          0.21       2021-02-10 [1] CRAN (R 4.0.2)                    
##  xml2          1.3.2      2020-04-23 [1] CRAN (R 4.0.0)                    
##  yaml          2.2.1      2020-02-01 [1] CRAN (R 4.0.0)                    
## 
## [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library