Generating minimal data to test teal.modules.clinical

The following script is used to create and save cached synthetic CDISC data to the data/ directory to use in examples and tests in the teal.modules.clinical package. This script/vignette was initialized by Emily de la Rua in tern.

Disclaimer: this vignette concerns mainly the development of minimal and stable test data and it is kept internal for feature tracking.

Setup & Helper Functions

library(dplyr)
library(teal.data)

study_duration_secs <- lubridate::seconds(lubridate::years(2))

sample_fct <- function(x, N, ...) { 
  checkmate::assert_number(N)
  factor(sample(x, N, replace = TRUE, ...), levels = if (is.factor(x)) levels(x) else x)
}

retain <- function(df, value_var, event, outside = NA) {
  indices <- c(1, which(event == TRUE), nrow(df) + 1)
  values <- c(outside, value_var[event == TRUE])
  rep(values, diff(indices))
}

relvar_init <- function(relvar1, relvar2) {
  if (length(relvar1) != length(relvar2)) {
    message(simpleError(
      "The argument value length of relvar1 and relvar2 differ. They must contain the same number of elements."
    ))
    return(NA)
  }
  return(list("relvar1" = relvar1, "relvar2" = relvar2))
}

rel_var <- function(df = NULL, var_name = NULL, var_values = NULL, related_var = NULL) {
  if (is.null(df)) {
    message("Missing data frame argument value.")
    return(NA)
  } else {
    n_relvar1 <- length(unique(df[, related_var, drop = TRUE]))
    n_relvar2 <- length(var_values)
    if (n_relvar1 != n_relvar2) {
      message(paste("Unequal vector lengths for", related_var, "and", var_name))
      return(NA)
    } else {
      relvar1 <- unique(df[, related_var, drop = TRUE])
      relvar2_values <- rep(NA, nrow(df))
      for (r in seq_len(length(relvar1))) {
        matched <- which(df[, related_var, drop = TRUE] == relvar1[r])
        relvar2_values[matched] <- var_values[r]
      }
      return(relvar2_values)
    }
  }
}

visit_schedule <- function(visit_format = "WEEK",
                           n_assessments = 10L,
                           n_days = 5L) {
  if (!(toupper(visit_format) %in% c("WEEK", "CYCLE"))) {
    message("Visit format value must either be: WEEK or CYCLE")
    return(NA)
  }
  if (toupper(visit_format) == "WEEK") {
    assessments <- 1:n_assessments
    assessments_ord <- -1:n_assessments
    visit_values <- c("SCREENING", "BASELINE", paste(toupper(visit_format), assessments, "DAY", (assessments * 7) + 1))
  } else if (toupper(visit_format) == "CYCLE") {
    cycles <- sort(rep(1:n_assessments, times = 1, each = n_days))
    days <- rep(seq(1:n_days), times = n_assessments, each = 1)
    assessments_ord <- 0:(n_assessments * n_days)
    visit_values <- c("SCREENING", paste(toupper(visit_format), cycles, "DAY", days))
  }
  visit_values <- stats::reorder(factor(visit_values), assessments_ord)
}

rtpois <- function(n, lambda) stats::qpois(stats::runif(n, stats::dpois(0, lambda), 1), lambda)

rtexp <- function(n, rate, l = NULL, r = NULL) {
  if (!