Chapter 5 Data descriptives
5.1 Data
<- read_rds("data/mtf.rds")
mtf <- read_rds("data/us.rds")
us <- read_rds("data/yrbs.rds") yrbs
5.1.1 Sample sizes and years
Sample sizes per dataset are the number of rows that include the outcome variable (there are two for US). The total sample size in the entire manuscript is the sum without US-emotion (including both US outcomes would duplicate its N).
tibble(
mtf = drop_na(mtf, Depression) %>% nrow,
us_conduct = drop_na(us, Conduct) %>% nrow,
us_emotion = drop_na(us, Emotion) %>% nrow,
yrbs = drop_na(yrbs, Suicide) %>% nrow
%>%
) mutate(total = mtf + us_conduct + yrbs) %>%
mutate(across(.fns = ~comma(.))) %>%
kable()
mtf | us_conduct | us_emotion | yrbs | total |
---|---|---|---|---|
380,924 | 19,190 | 19,184 | 30,447 | 430,561 |
Actual sample sizes used in analyses are the intersections of non-missing values for the two variables (outcome, technology) used in each model.
<- function(data, name, x, y) {
doit <- data %>%
data drop_na(all_of(x), all_of(y))
%>%
data count(Sex) %>%
pivot_wider(names_from = Sex, values_from = n) %>%
mutate(Total = Female + Male) %>%
mutate(
across(
c(Female, Male),
~str_glue("{comma(.)} ({percent(./Total)})")
)%>%
) mutate(across(where(is.numeric), ~comma(.))) %>%
mutate(Years = paste(range(data$Year), collapse = " - ")) %>%
mutate(Dataset = name, Technology = x, Outcome = y) %>%
select(Dataset, Technology, Outcome, Years, Total, Female, Male)
}bind_rows(
doit(mtf, "MTF", "TV", "Depression"),
doit(mtf, "MTF", "SM", "Depression"),
doit(us, "US", "TV", "Emotion"),
doit(us, "US", "SM", "Emotion"),
doit(us, "US", "TV", "Conduct"),
doit(us, "US", "SM", "Conduct"),
doit(yrbs, "YRBS", "TV", "Suicide"),
doit(yrbs, "YRBS", "DV", "Suicide")
%>%
) arrange(Dataset, Technology, Outcome) %>%
kable()
Dataset | Technology | Outcome | Years | Total | Female | Male |
---|---|---|---|---|---|---|
MTF | SM | Depression | 2009 - 2017 | 120,265 | 61,792 (51%) | 58,473 (49%) |
MTF | TV | Depression | 1991 - 2017 | 367,444 | 191,859 (52%) | 175,585 (48%) |
US | SM | Conduct | 2009 - 2017 | 18,815 | 9,434 (50%) | 9,381 (50%) |
US | SM | Emotion | 2009 - 2017 | 18,811 | 9,432 (50%) | 9,379 (50%) |
US | TV | Conduct | 2009 - 2017 | 19,079 | 9,554 (50%) | 9,525 (50%) |
US | TV | Emotion | 2009 - 2017 | 19,074 | 9,552 (50%) | 9,522 (50%) |
YRBS | DV | Suicide | 2007 - 2017 | 29,450 | 15,424 (52%) | 14,026 (48%) |
YRBS | TV | Suicide | 2007 - 2017 | 29,433 | 15,422 (52%) | 14,011 (48%) |
5.1.2 Missingness
<- rowSums(is.na(select(mtf, starts_with("D_B_"))))
mtf_db_na kable(prop.table(table(mtf_db_na)), digits = 3)
mtf_db_na | Freq |
---|---|
0 | 0.949 |
1 | 0.027 |
2 | 0.006 |
3 | 0.005 |
4 | 0.007 |
5 | 0.007 |
<- rowSums(is.na(select(us, starts_with("sdq"))))
us_sdq_na kable(prop.table(table(us_sdq_na)), digits = 3)
us_sdq_na | Freq |
---|---|
0 | 0.958 |
1 | 0.025 |
2 | 0.004 |
3 | 0.001 |
4 | 0.001 |
5 | 0.000 |
6 | 0.001 |
7 | 0.000 |
8 | 0.000 |
9 | 0.000 |
10 | 0.010 |
5.1.3 Outcomes
%>%
mtf select(starts_with("D_B_")) %>%
::alpha() %>%
psych summary
##
## Reliability analysis
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.84 0.48 5.5 0.00039 2 0.95 0.44
cor(
select(mtf, starts_with("D_B_")),
use = "pairwise.complete.obs"
%>%
) round(2)
## D_B_1 D_B_2 D_B_3 D_B_4 D_B_5 D_B_6
## D_B_1 1.00 0.60 0.48 0.34 0.56 0.34
## D_B_2 0.60 1.00 0.57 0.43 0.63 0.45
## D_B_3 0.48 0.57 1.00 0.39 0.59 0.39
## D_B_4 0.34 0.43 0.39 1.00 0.38 0.64
## D_B_5 0.56 0.63 0.59 0.38 1.00 0.40
## D_B_6 0.34 0.45 0.39 0.64 0.40 1.00
<- c("sdqe", "sdqg", "sdql", "sdqr", "sdqv")
sdq_con <- c("sdqc", "sdqh", "sdqm", "sdqp", "sdqx")
sdq_emo %>%
us select(all_of(sdq_con)) %>%
::alpha() %>%
psych summary
##
## Reliability analysis
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.62 0.62 0.58 0.25 1.6 0.0041 1.4 0.36 0.24
%>%
us select(all_of(sdq_emo)) %>%
::alpha() %>%
psych summary
##
## Reliability analysis
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.69 0.69 0.65 0.31 2.2 0.0035 1.6 0.45 0.29
cor(
select(us, all_of(sdq_con)),
use = "pairwise.complete.obs"
%>%
) round(2)
## sdqe sdqg sdql sdqr sdqv
## sdqe 1.00 0.30 0.34 0.34 0.16
## sdqg 0.30 1.00 0.23 0.24 0.15
## sdql 0.34 0.23 1.00 0.28 0.21
## sdqr 0.34 0.24 0.28 1.00 0.23
## sdqv 0.16 0.15 0.21 0.23 1.00
cor(
select(us, all_of(sdq_emo)),
use = "pairwise.complete.obs"
%>%
) round(2)
## sdqc sdqh sdqm sdqp sdqx
## sdqc 1.00 0.26 0.28 0.19 0.21
## sdqh 0.26 1.00 0.42 0.39 0.39
## sdqm 0.28 0.42 1.00 0.29 0.30
## sdqp 0.19 0.39 0.29 1.00 0.37
## sdqx 0.21 0.39 0.30 0.37 1.00
5.2 Figures
%>%
mtf pivot_longer(c(TV, SM, Depression)) %>%
drop_na(value) %>%
add_count(name) %>%
mutate(name = str_glue("{name} (N = {comma(n)})")) %>%
ggplot(aes(Year, value)) +
geom_blank() +
stat_summary(fun=mean, geom="line") +
facet_wrap("name", scales = "free_y")
%>%
us pivot_longer(c(SM, TV, Emotion, Conduct)) %>%
drop_na(value) %>%
add_count(name) %>%
mutate(name = str_glue("{name} (N = {comma(n, accuracy = 1)})")) %>%
ggplot(aes(Year, value)) +
geom_blank() +
stat_summary(fun=mean, geom="line") +
facet_wrap("name", scales = "free_y")
%>%
yrbs pivot_longer(c(TV, DV, sad_lonely:suicide_3)) %>%
drop_na(value) %>%
add_count(name) %>%
mutate(name = str_glue("{name} (N = {comma(n)})")) %>%
ggplot(aes(Year, value)) +
geom_blank() +
stat_summary(fun=mean, geom="line") +
facet_wrap("name", scales = "free_y")
options(width = 120)
library(sessioninfo)
session_info()
## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.3 (2020-10-10)
## os macOS Big Sur 10.16
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_GB.UTF-8
## ctype en_GB.UTF-8
## tz Europe/London
## date 2021-03-01
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.0)
## backports 1.2.1 2020-12-09 [1] CRAN (R 4.0.2)
## bookdown 0.21.6 2021-03-01 [1] Github (rstudio/bookdown@ca0145f)
## broom * 0.7.5.9000 2021-03-01 [1] Github (tidymodels/broom@0b3528b)
## bslib 0.2.4 2021-01-25 [1] CRAN (R 4.0.3)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.0.0)
## cli 2.3.1 2021-02-23 [1] CRAN (R 4.0.3)
## colorspace 2.0-0 2020-11-11 [1] CRAN (R 4.0.2)
## crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.3)
## DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.2)
## dbplyr 2.1.0 2021-02-03 [1] CRAN (R 4.0.2)
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.2)
## dplyr * 1.0.4 2021-02-02 [1] CRAN (R 4.0.2)
## ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.0)
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0)
## fansi 0.4.2 2021-01-15 [1] CRAN (R 4.0.2)
## forcats * 0.5.1 2021-01-27 [1] CRAN (R 4.0.2)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.2)
## ggplot2 * 3.3.3 2020-12-30 [1] CRAN (R 4.0.2)
## ggstance * 0.3.5 2020-12-17 [1] CRAN (R 4.0.2)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.0)
## haven 2.3.1 2020-06-01 [1] CRAN (R 4.0.0)
## highr 0.8 2019-03-20 [1] CRAN (R 4.0.0)
## hms 1.0.0 2021-01-13 [1] CRAN (R 4.0.2)
## htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.2)
## httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.2)
## jquerylib 0.1.3 2020-12-17 [1] CRAN (R 4.0.2)
## jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.2)
## knitr * 1.31 2021-01-27 [1] CRAN (R 4.0.2)
## lattice 0.20-41 2020-04-02 [1] CRAN (R 4.0.3)
## lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.2)
## lubridate 1.7.9.2 2020-11-13 [1] CRAN (R 4.0.2)
## magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.2)
## mnormt 2.0.2 2020-09-01 [1] CRAN (R 4.0.2)
## modelr 0.1.8 2020-05-19 [1] CRAN (R 4.0.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.0)
## nlme 3.1-152 2021-02-04 [1] CRAN (R 4.0.2)
## pacman 0.5.1 2019-03-11 [1] CRAN (R 4.0.0)
## pillar 1.5.0 2021-02-22 [1] CRAN (R 4.0.3)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0)
## psych 2.0.12 2020-12-16 [1] CRAN (R 4.0.2)
## purrr * 0.3.4 2020-04-17 [1] CRAN (R 4.0.0)
## R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.2)
## Rcpp 1.0.6 2021-01-15 [1] CRAN (R 4.0.2)
## readr * 1.4.0 2020-10-05 [1] CRAN (R 4.0.2)
## readxl 1.3.1 2019-03-13 [1] CRAN (R 4.0.0)
## reprex 1.0.0 2021-01-27 [1] CRAN (R 4.0.2)
## rlang 0.4.10 2020-12-30 [1] CRAN (R 4.0.2)
## rmarkdown 2.7.2 2021-03-01 [1] Github (rstudio/rmarkdown@9bfaf4a)
## rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.2)
## rvest 0.3.6 2020-07-25 [1] CRAN (R 4.0.2)
## sass 0.3.1 2021-01-24 [1] CRAN (R 4.0.2)
## scales * 1.1.1 2020-05-11 [1] CRAN (R 4.0.0)
## sessioninfo * 1.1.1 2018-11-05 [1] CRAN (R 4.0.0)
## stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.2)
## stringr * 1.4.0 2019-02-10 [1] CRAN (R 4.0.0)
## tibble * 3.1.0 2021-02-25 [1] CRAN (R 4.0.2)
## tidyr * 1.1.2 2020-08-27 [1] CRAN (R 4.0.2)
## tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.0)
## tidyverse * 1.3.0 2019-11-21 [1] CRAN (R 4.0.0)
## tmvnsim 1.0-2 2016-12-15 [1] CRAN (R 4.0.0)
## utf8 1.1.4 2018-05-24 [1] CRAN (R 4.0.0)
## vctrs 0.3.6 2020-12-17 [1] CRAN (R 4.0.2)
## withr 2.4.1 2021-01-26 [1] CRAN (R 4.0.2)
## xfun 0.21 2021-02-10 [1] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.0)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library