1 Setting-up

In this section, I load all libraries and define all custom functions that we need for data processing and analysis. Note I use the pacman package for loading libraries.

# pacman makes it easier to load and install packages
library(pacman)

# load packages
p_load(
  tidyverse,
  here,
  ggridges,
  cowplot,
  kableExtra,
  directlabels,
  GGally,
  lavaan,
  brms,
  ggbeeswarm,
  osfr
)

# set seed
set.seed(42)

# set theme
theme_set(theme_cowplot())

# custom colors that are color blind friendly
cb_palette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

# chunk options
knitr::opts_chunk$set(
  message = FALSE,
  warning = FALSE
)

Below the functions I use throughout the script. They’re usually explained in the section where I use them.

dens_with_points <- 
  function(
    data,
    variable
  ) {
    p <- 
      ggplot(data, aes_string(x = variable, y = 0)) +
      geom_density_ridges(
        jittered_points = TRUE, 
        position = "raincloud", 
        fill = "darkslateblue", 
        point_color = "darkslateblue", 
        color = "darkslateblue", 
        alpha = 0.5
      ) +
      theme_cowplot() +
      theme(
        axis.line=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.y=element_blank(),
        axis.title.x=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        plot.background=element_blank()
      )
    
    return(p)
  }

# function that applies the exclusion criterion, taking the number of waves as argument plus the list of to be excluded participants
apply_exclusion <- 
  function(
    wave_number,
    exclusion_criterion
  ){
    N <- 
      working_file %>% 
      filter(id %in% (waves_per_id %>% filter(n >= wave_number) %>% pull(id))) %>% # select only those with at least this many waves
      group_by(id) %>%
      slice(1) %>% 
      filter(!id %in% exclusion_criterion) %>%
      nrow()
    
    return(N)
  }

# the same function as above, but this time applying all exclusions at the same time
apply_all_exclusions <- 
  function(
    wave_number
  ){
    N <- 
      working_file %>% 
      filter(id %in% (waves_per_id %>% filter(n >= wave_number) %>% pull(id))) %>% 
      group_by(id) %>% 
      slice(1) %>% 
      filter(!id %in% exclusions_pp) %>% 
      nrow()
    
    return(N)
  }

# raincloud plot function from https://github.com/RainCloudPlots/RainCloudPlots/blob/master/tutorial_R/R_rainclouds.R
# Defining the geom_flat_violin function ----
# Note: the below code modifies the
# existing github page by removing a parenthesis in line 50

"%||%" <- function(a, b) {
  if (!is.null(a)) a else b
}

geom_flat_violin <- function(mapping = NULL, data = NULL, stat = "ydensity",
                             position = "dodge", trim = TRUE, scale = "area",
                             show.legend = NA, inherit.aes = TRUE, ...) {
  layer(
    data = data,
    mapping = mapping,
    stat = stat,
    geom = GeomFlatViolin,
    position = position,
    show.legend = show.legend,
    inherit.aes = inherit.aes,
    params = list(
      trim = trim,
      scale = scale,
      ...
    )
  )
}

#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
GeomFlatViolin <-
  ggproto("GeomFlatViolin", Geom,
    setup_data = function(data, params) {
      data$width <- data$width %||%
        params$width %||% (resolution(data$x, FALSE) * 0.9)

      # ymin, ymax, xmin, and xmax define the bounding rectangle for each group
      data %>%
        group_by(group) %>%
        mutate(
          ymin = min(y),
          ymax = max(y),
          xmin = x,
          xmax = x + width / 2
        )
    },

    draw_group = function(data, panel_scales, coord) {
      # Find the points for the line to go all the way around
      data <- transform(data,
        xminv = x,
        xmaxv = x + violinwidth * (xmax - x)
      )

      # Make sure it's sorted properly to draw the outline
      newdata <- rbind(
        plyr::arrange(transform(data, x = xminv), y),
        plyr::arrange(transform(data, x = xmaxv), -y)
      )

      # Close the polygon: set first and last point the same
      # Needed for coord_polar and such
      newdata <- rbind(newdata, newdata[1, ])

      ggplot2:::ggname("geom_flat_violin", GeomPolygon$draw_panel(newdata, panel_scales, coord))
    },

    draw_key = draw_key_polygon,

    default_aes = aes(
      weight = 1, colour = "grey20", fill = "white", size = 0.5,
      alpha = NA, linetype = "solid"
    ),

    required_aes = c("x", "y")
  )

# function that returns summary stats
describe <- function(
  dat,
  variable,
  trait = FALSE
){
  # if variable is not repeated-measures, take only one measure per participant
  if (trait == TRUE){
    dat <- 
      dat %>%
      group_by(id) %>% 
      slice(1) %>% 
      ungroup()
  }
  
  # then get descriptives
  descriptives <-
    dat %>%
    filter(!is.na(UQ(sym(variable)))) %>% # remove missing values
    summarise(
      across(
        !! variable,
        list(
          N = ~ n(),
          mean = mean,
          sd = sd,
          median = median,
          min = min,
          max = max,
          cilow = ~Rmisc::CI(.x)[[3]], # lower CI
          cihigh = ~Rmisc::CI(.x)[[1]] # upper CI
        )
      )
    )

  descriptives <-
    descriptives %>%

    # only keep measure
    rename_all(
      ~ str_remove(
        .,
        paste0(variable, "_")
      )
    ) %>%
    mutate(
      variable = variable,
      range = max - min
    ) %>%
    relocate(variable) %>%
    relocate(
      range,
      .after = max
    )
  
  return(descriptives)
}

# a single raincloud plot
single_cloud <- 
  function(
    raw_data,
    summary_data,
    variable,
    color,
    title,
    trait = FALSE
  ){
    
    # take only one row per person if it's a trait variable
    if (trait == TRUE){
      raw_data <-
        raw_data %>% 
        group_by(id) %>% 
        slice(1) %>% 
        ungroup()
    }
    
    # the plot
    p <- 
      ggplot(
        raw_data %>%
          mutate(Density = 1),
        aes(
          x = Density,
          y = get(variable)
        )
      ) +
      geom_flat_violin( # the "cloud"
        position = position_nudge(x = .2, y = 0),
        adjust = 2,
        color = NA,
        fill = color,
        alpha = 0.5
      ) +
      geom_point( # the "rain"
        position = position_jitter(width = .15),
        size = 1,
        color = color,
        alpha = 0.5
      ) +
      geom_point( # the mean from the summary stats
        data = summary_data %>%
          filter(variable == !! variable) %>%
          mutate(Density = 1),
        aes(
          x = Density + 0.175,
          y = mean
        ),
        color = color,
        size = 2.5
      ) +
      geom_errorbar( # error bars
        data = summary_data %>%
          filter(variable == !! variable) %>%
          mutate(Density = 1),
        aes(
          x = Density + 0.175,
          y = mean,
          ymin = cilow,
          ymax = cihigh
        ),
        width = 0,
        size = 0.8,
        color = color
      ) +
      ylab(title) +
      theme(
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.y = element_blank(),
        axis.line = element_blank()
      ) +
      guides(
        color = FALSE,
        fill = FALSE
      ) +
      coord_flip()
    
    return(p)
  }

lm_function <- 
  function(
    data, 
    mapping, 
    ...
    ){
  p <- 
    ggplot(
      data = data, 
      mapping = mapping
      ) + 
    geom_point(
      color = "#56B4E9",
      alpha = 0.5
    ) + 
    geom_smooth(
      method=lm, 
      fill="#0072B2", 
      color="#0072B2", 
      ...)
  p
}

dens_function <-
  function(
    data,
    mapping,
    ...
  ){
    p <- 
      ggplot(
        data = data,
        mapping = mapping
      ) +
      geom_density(fill = "#009E73", color = NA, alpha = 0.5)
  }

model_diagnostics <- 
  function(
    model
  ){
  plot_grid(
    pp_check(
      model,
      type = "dens_overlay",
      nsamples = 100
    ),
    pp_check(
      model,
      type = "loo_pit_qq",
      nsamples = 100
    ),
    pp_check(
      model,
      type = "loo_pit_overlay",
      nsamples = 100
    ),
    pp_check(
      model,
      type = "stat",
      stat = "median",
      nsamples = 100
    ),
    pp_check(
      model,
      type = "stat",
      stat = "mean",
      nsamples = 100
    ),
    labels = c("Density overlay", "LOO-PIT QQ", "LOO-PIT Uniform", "Predicted medians", "Predicted means"),
    ncol = 2,
    label_size = 8,
    hjust = 0,
    vjust = 0,
    label_x = 0,
    label_y = 0.93
  )
  }