1 Setting-up
In this section, I load all libraries and define all custom functions that we need for data processing and analysis.
Note I use the pacman
package for loading libraries.
# pacman makes it easier to load and install packages
library(pacman)
# load packages
p_load(
tidyverse,
here,
ggridges,
cowplot,
kableExtra,
directlabels,
GGally,
lavaan,
brms,
ggbeeswarm,
osfr
)
# set seed
set.seed(42)
# set theme
theme_set(theme_cowplot())
# custom colors that are color blind friendly
<- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
cb_palette
# chunk options
::opts_chunk$set(
knitrmessage = FALSE,
warning = FALSE
)
Below the functions I use throughout the script. They’re usually explained in the section where I use them.
<-
dens_with_points function(
data,
variable
) {<-
p ggplot(data, aes_string(x = variable, y = 0)) +
geom_density_ridges(
jittered_points = TRUE,
position = "raincloud",
fill = "darkslateblue",
point_color = "darkslateblue",
color = "darkslateblue",
alpha = 0.5
+
) theme_cowplot() +
theme(
axis.line=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.y=element_blank(),
axis.title.x=element_blank(),
legend.position="none",
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank()
)
return(p)
}
# function that applies the exclusion criterion, taking the number of waves as argument plus the list of to be excluded participants
<-
apply_exclusion function(
wave_number,
exclusion_criterion
){<-
N %>%
working_file filter(id %in% (waves_per_id %>% filter(n >= wave_number) %>% pull(id))) %>% # select only those with at least this many waves
group_by(id) %>%
slice(1) %>%
filter(!id %in% exclusion_criterion) %>%
nrow()
return(N)
}
# the same function as above, but this time applying all exclusions at the same time
<-
apply_all_exclusions function(
wave_number
){<-
N %>%
working_file filter(id %in% (waves_per_id %>% filter(n >= wave_number) %>% pull(id))) %>%
group_by(id) %>%
slice(1) %>%
filter(!id %in% exclusions_pp) %>%
nrow()
return(N)
}
# raincloud plot function from https://github.com/RainCloudPlots/RainCloudPlots/blob/master/tutorial_R/R_rainclouds.R
# Defining the geom_flat_violin function ----
# Note: the below code modifies the
# existing github page by removing a parenthesis in line 50
"%||%" <- function(a, b) {
if (!is.null(a)) a else b
}
<- function(mapping = NULL, data = NULL, stat = "ydensity",
geom_flat_violin position = "dodge", trim = TRUE, scale = "area",
show.legend = NA, inherit.aes = TRUE, ...) {
layer(
data = data,
mapping = mapping,
stat = stat,
geom = GeomFlatViolin,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
trim = trim,
scale = scale,
...
)
)
}
#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
<-
GeomFlatViolin ggproto("GeomFlatViolin", Geom,
setup_data = function(data, params) {
$width <- data$width %||%
data$width %||% (resolution(data$x, FALSE) * 0.9)
params
# ymin, ymax, xmin, and xmax define the bounding rectangle for each group
%>%
data group_by(group) %>%
mutate(
ymin = min(y),
ymax = max(y),
xmin = x,
xmax = x + width / 2
)
},
draw_group = function(data, panel_scales, coord) {
# Find the points for the line to go all the way around
<- transform(data,
data xminv = x,
xmaxv = x + violinwidth * (xmax - x)
)
# Make sure it's sorted properly to draw the outline
<- rbind(
newdata ::arrange(transform(data, x = xminv), y),
plyr::arrange(transform(data, x = xmaxv), -y)
plyr
)
# Close the polygon: set first and last point the same
# Needed for coord_polar and such
<- rbind(newdata, newdata[1, ])
newdata
:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord))
ggplot2
},
draw_key = draw_key_polygon,
default_aes = aes(
weight = 1, colour = "grey20", fill = "white", size = 0.5,
alpha = NA, linetype = "solid"
),
required_aes = c("x", "y")
)
# function that returns summary stats
<- function(
describe
dat,
variable,trait = FALSE
){# if variable is not repeated-measures, take only one measure per participant
if (trait == TRUE){
<-
dat %>%
dat group_by(id) %>%
slice(1) %>%
ungroup()
}
# then get descriptives
<-
descriptives %>%
dat filter(!is.na(UQ(sym(variable)))) %>% # remove missing values
summarise(
across(
!! variable,
list(
N = ~ n(),
mean = mean,
sd = sd,
median = median,
min = min,
max = max,
cilow = ~Rmisc::CI(.x)[[3]], # lower CI
cihigh = ~Rmisc::CI(.x)[[1]] # upper CI
)
)
)
<-
descriptives %>%
descriptives
# only keep measure
rename_all(
~ str_remove(
.,paste0(variable, "_")
)%>%
) mutate(
variable = variable,
range = max - min
%>%
) relocate(variable) %>%
relocate(
range,.after = max
)
return(descriptives)
}
# a single raincloud plot
<-
single_cloud function(
raw_data,
summary_data,
variable,
color,
title,trait = FALSE
){
# take only one row per person if it's a trait variable
if (trait == TRUE){
<-
raw_data %>%
raw_data group_by(id) %>%
slice(1) %>%
ungroup()
}
# the plot
<-
p ggplot(
%>%
raw_data mutate(Density = 1),
aes(
x = Density,
y = get(variable)
)+
) geom_flat_violin( # the "cloud"
position = position_nudge(x = .2, y = 0),
adjust = 2,
color = NA,
fill = color,
alpha = 0.5
+
) geom_point( # the "rain"
position = position_jitter(width = .15),
size = 1,
color = color,
alpha = 0.5
+
) geom_point( # the mean from the summary stats
data = summary_data %>%
filter(variable == !! variable) %>%
mutate(Density = 1),
aes(
x = Density + 0.175,
y = mean
),color = color,
size = 2.5
+
) geom_errorbar( # error bars
data = summary_data %>%
filter(variable == !! variable) %>%
mutate(Density = 1),
aes(
x = Density + 0.175,
y = mean,
ymin = cilow,
ymax = cihigh
),width = 0,
size = 0.8,
color = color
+
) ylab(title) +
theme(
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_blank(),
axis.line = element_blank()
+
) guides(
color = FALSE,
fill = FALSE
+
) coord_flip()
return(p)
}
<-
lm_function function(
data,
mapping,
...
){<-
p ggplot(
data = data,
mapping = mapping
+
) geom_point(
color = "#56B4E9",
alpha = 0.5
+
) geom_smooth(
method=lm,
fill="#0072B2",
color="#0072B2",
...)
p
}
<-
dens_function function(
data,
mapping,
...
){<-
p ggplot(
data = data,
mapping = mapping
+
) geom_density(fill = "#009E73", color = NA, alpha = 0.5)
}
<-
model_diagnostics function(
model
){plot_grid(
pp_check(
model,type = "dens_overlay",
nsamples = 100
),pp_check(
model,type = "loo_pit_qq",
nsamples = 100
),pp_check(
model,type = "loo_pit_overlay",
nsamples = 100
),pp_check(
model,type = "stat",
stat = "median",
nsamples = 100
),pp_check(
model,type = "stat",
stat = "mean",
nsamples = 100
),labels = c("Density overlay", "LOO-PIT QQ", "LOO-PIT Uniform", "Predicted medians", "Predicted means"),
ncol = 2,
label_size = 8,
hjust = 0,
vjust = 0,
label_x = 0,
label_y = 0.93
) }