1 Setting-up
In this section, I load all libraries and define all custom functions that we need for data processing and analysis.
Note I use the pacman package for loading libraries.
# pacman makes it easier to load and install packages
library(pacman)
# load packages
p_load(
tidyverse,
here,
ggridges,
cowplot,
kableExtra,
directlabels,
GGally,
lavaan,
brms,
ggbeeswarm,
osfr
)
# set seed
set.seed(42)
# set theme
theme_set(theme_cowplot())
# custom colors that are color blind friendly
cb_palette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# chunk options
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE
)Below the functions I use throughout the script. They’re usually explained in the section where I use them.
dens_with_points <-
function(
data,
variable
) {
p <-
ggplot(data, aes_string(x = variable, y = 0)) +
geom_density_ridges(
jittered_points = TRUE,
position = "raincloud",
fill = "darkslateblue",
point_color = "darkslateblue",
color = "darkslateblue",
alpha = 0.5
) +
theme_cowplot() +
theme(
axis.line=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.y=element_blank(),
axis.title.x=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank()
)
return(p)
}
# function that applies the exclusion criterion, taking the number of waves as argument plus the list of to be excluded participants
apply_exclusion <-
function(
wave_number,
exclusion_criterion
){
N <-
working_file %>%
filter(id %in% (waves_per_id %>% filter(n >= wave_number) %>% pull(id))) %>% # select only those with at least this many waves
group_by(id) %>%
slice(1) %>%
filter(!id %in% exclusion_criterion) %>%
nrow()
return(N)
}
# the same function as above, but this time applying all exclusions at the same time
apply_all_exclusions <-
function(
wave_number
){
N <-
working_file %>%
filter(id %in% (waves_per_id %>% filter(n >= wave_number) %>% pull(id))) %>%
group_by(id) %>%
slice(1) %>%
filter(!id %in% exclusions_pp) %>%
nrow()
return(N)
}
# raincloud plot function from https://github.com/RainCloudPlots/RainCloudPlots/blob/master/tutorial_R/R_rainclouds.R
# Defining the geom_flat_violin function ----
# Note: the below code modifies the
# existing github page by removing a parenthesis in line 50
"%||%" <- function(a, b) {
if (!is.null(a)) a else b
}
geom_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity",
position = "dodge", trim = TRUE, scale = "area",
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomFlatViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
...
)
)
}
#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
GeomFlatViolin <-
ggproto("GeomFlatViolin", Geom,
setup_data = function(data, params) {
data$width <- data$width %||%
params$width %||% (resolution(data$x, FALSE) * 0.9)
# ymin, ymax, xmin, and xmax define the bounding rectangle for each group
data %>%
group_by(group) %>%
mutate(
ymin = min(y),
ymax = max(y),
xmin = x,
xmax = x + width / 2
)
},
draw_group = function(data, panel_scales, coord) {
# Find the points for the line to go all the way around
data <- transform(data,
xminv = x,
xmaxv = x + violinwidth * (xmax - x)
)
# Make sure it's sorted properly to draw the outline
newdata <- rbind(
plyr::arrange(transform(data, x = xminv), y),
plyr::arrange(transform(data, x = xmaxv), -y)
)
# Close the polygon: set first and last point the same
# Needed for coord_polar and such
newdata <- rbind(newdata, newdata[1, ])
ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord))
},
draw_key = draw_key_polygon,
default_aes = aes(
weight = 1, colour = "grey20", fill = "white", size = 0.5,
alpha = NA, linetype = "solid"
),
required_aes = c("x", "y")
)
# function that returns summary stats
describe <- function(
dat,
variable,
trait = FALSE
){
# if variable is not repeated-measures, take only one measure per participant
if (trait == TRUE){
dat <-
dat %>%
group_by(id) %>%
slice(1) %>%
ungroup()
}
# then get descriptives
descriptives <-
dat %>%
filter(!is.na(UQ(sym(variable)))) %>% # remove missing values
summarise(
across(
!! variable,
list(
N = ~ n(),
mean = mean,
sd = sd,
median = median,
min = min,
max = max,
cilow = ~Rmisc::CI(.x)[[3]], # lower CI
cihigh = ~Rmisc::CI(.x)[[1]] # upper CI
)
)
)
descriptives <-
descriptives %>%
# only keep measure
rename_all(
~ str_remove(
.,
paste0(variable, "_")
)
) %>%
mutate(
variable = variable,
range = max - min
) %>%
relocate(variable) %>%
relocate(
range,
.after = max
)
return(descriptives)
}
# a single raincloud plot
single_cloud <-
function(
raw_data,
summary_data,
variable,
color,
title,
trait = FALSE
){
# take only one row per person if it's a trait variable
if (trait == TRUE){
raw_data <-
raw_data %>%
group_by(id) %>%
slice(1) %>%
ungroup()
}
# the plot
p <-
ggplot(
raw_data %>%
mutate(Density = 1),
aes(
x = Density,
y = get(variable)
)
) +
geom_flat_violin( # the "cloud"
position = position_nudge(x = .2, y = 0),
adjust = 2,
color = NA,
fill = color,
alpha = 0.5
) +
geom_point( # the "rain"
position = position_jitter(width = .15),
size = 1,
color = color,
alpha = 0.5
) +
geom_point( # the mean from the summary stats
data = summary_data %>%
filter(variable == !! variable) %>%
mutate(Density = 1),
aes(
x = Density + 0.175,
y = mean
),
color = color,
size = 2.5
) +
geom_errorbar( # error bars
data = summary_data %>%
filter(variable == !! variable) %>%
mutate(Density = 1),
aes(
x = Density + 0.175,
y = mean,
ymin = cilow,
ymax = cihigh
),
width = 0,
size = 0.8,
color = color
) +
ylab(title) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.line = element_blank()
) +
guides(
color = FALSE,
fill = FALSE
) +
coord_flip()
return(p)
}
lm_function <-
function(
data,
mapping,
...
){
p <-
ggplot(
data = data,
mapping = mapping
) +
geom_point(
color = "#56B4E9",
alpha = 0.5
) +
geom_smooth(
method=lm,
fill="#0072B2",
color="#0072B2",
...)
p
}
dens_function <-
function(
data,
mapping,
...
){
p <-
ggplot(
data = data,
mapping = mapping
) +
geom_density(fill = "#009E73", color = NA, alpha = 0.5)
}
model_diagnostics <-
function(
model
){
plot_grid(
pp_check(
model,
type = "dens_overlay",
nsamples = 100
),
pp_check(
model,
type = "loo_pit_qq",
nsamples = 100
),
pp_check(
model,
type = "loo_pit_overlay",
nsamples = 100
),
pp_check(
model,
type = "stat",
stat = "median",
nsamples = 100
),
pp_check(
model,
type = "stat",
stat = "mean",
nsamples = 100
),
labels = c("Density overlay", "LOO-PIT QQ", "LOO-PIT Uniform", "Predicted medians", "Predicted means"),
ncol = 2,
label_size = 8,
hjust = 0,
vjust = 0,
label_x = 0,
label_y = 0.93
)
}