Run the full Experiment
pipeline (fitting, evaluating, and visualizing).
Source: R/experiment-helpers.R
run_experiment.Rd
Run the full Experiment
pipeline (fitting, evaluating, and visualizing).
Usage
run_experiment(
experiment,
n_reps = 1,
parallel_strategy = c("reps"),
future.globals = NULL,
future.packages = NULL,
future.seed = TRUE,
use_cached = FALSE,
return_all_cached_reps = FALSE,
save = FALSE,
checkpoint_n_reps = 0,
verbose = 1,
...
)
Arguments
- experiment
An
Experiment
object.- n_reps
The number of replicates of the
Experiment
for this run.- parallel_strategy
A vector with some combination of "reps", "dgps", or "methods". Determines how computation will be distributed across available resources. Currently only the default, "reps", is supported.
- future.globals
Character vector of names in the global environment to pass to parallel workers. Passed as the argument of the same name to
future.apply::future_lapply
and related functions. To set for all runs of the experiment, use the same argument during initialization.- future.packages
Character vector of packages required by parallel workers. Passed as the argument of the same name to
future.apply::future_lapply
and related functions. To set for all runs of the experiment, use the same argument during initialization.- future.seed
Passed as the argument of the same name in
future.apply::future_apply
.- use_cached
Logical. If
TRUE
, find and return previously saved results. If cached results cannot be found, continue as ifuse_cached
wasFALSE
.- return_all_cached_reps
Logical. If
FALSE
(default), returns only the fit results for the requestedn_reps
. IfTRUE
, returns fit results for the requestedn_reps
plus any additional cached replicates from the (DGP
,Method
) combinations in theExperiment
. Note that even ifreturn_all_cached_reps = TRUE
, only then_reps
replicates are used when evaluating and visualizing theExperiment
.- save
If
TRUE
, save outputs to disk.- checkpoint_n_reps
The number of experiment replicates to compute before saving results to disk. If 0 (the default), no checkpoints are saved.
- verbose
Level of verbosity. Default is 1, which prints out messages after major checkpoints in the experiment. If 2, prints additional debugging information for warnings and messages from user-defined functions (in addition to error debugging information). If 0, no messages are printed other than user-defined function error debugging information.
- ...
Not used.
Value
A named list of results from the simulation experiment with the following entries:
- fit_results
A tibble containing results from the
fit
method. In addition to results columns, has columns named '.rep', '.dgp_name', '.method_name', and thevary_across
parameter names if applicable.- eval_results
A list of tibbles containing results from the
evaluate
method, which evaluates eachEvaluator
in theExperiment
. Length of list is equivalent to the number ofEvaluators
.- viz_results
A list of tibbles containing results from the
visualize
method, which visualizes eachVisualizer
in theExperiment
. Length of list is equivalent to the number ofVisualizers
.
Examples
## create toy DGPs, Methods, Evaluators, and Visualizers
# generate data from normal distribution with n samples
normal_dgp <- create_dgp(
.dgp_fun = function(n) rnorm(n), .name = "Normal DGP", n = 10
)
# generate data from binomial distribution with n samples
bernoulli_dgp <- create_dgp(
.dgp_fun = function(n) rbinom(n, 1, 0.5), .name = "Bernoulli DGP", n = 10
)
# compute mean of data
mean_method <- create_method(
.method_fun = function(x) list(mean = mean(x)), .name = "Mean(x)"
)
# evaluate SD of mean(x) across simulation replicates
sd_mean_eval <- create_evaluator(
.eval_fun = function(fit_results, vary_params = NULL) {
group_vars <- c(".dgp_name", ".method_name", vary_params)
fit_results %>%
dplyr::group_by(dplyr::across(tidyselect::all_of(group_vars))) %>%
dplyr::summarise(sd = sd(mean), .groups = "keep")
},
.name = "SD of Mean(x)"
)
# plot SD of mean(x) across simulation replicates
sd_mean_plot <- create_visualizer(
.viz_fun = function(fit_results, eval_results, vary_params = NULL,
eval_name = "SD of Mean(x)") {
if (!is.null(vary_params)) {
add_aes <- ggplot2::aes(
x = .data[[unique(vary_params)]], y = sd, color = .dgp_name
)
} else {
add_aes <- ggplot2::aes(x = .dgp_name, y = sd)
}
plt <- ggplot2::ggplot(eval_results[[eval_name]]) +
add_aes +
ggplot2::geom_point()
if (!is.null(vary_params)) {
plt <- plt + ggplot2::geom_line()
}
return(plt)
},
.name = "SD of Mean(x) Plot"
)
# initialize experiment with toy DGPs, Methods, Evaluators, and Visualizers
experiment <- create_experiment(
name = "Experiment Name",
dgp_list = list(`Normal DGP` = normal_dgp, `Bernoulli DGP` = bernoulli_dgp),
method_list = list(`Mean(x)` = mean_method),
evaluator_list = list(`SD of Mean(x)` = sd_mean_eval),
visualizer_list = list(`SD of Mean(x) Plot` = sd_mean_plot)
)
# initialize empty experiment with user-defined directory for saving results
experiment <- create_experiment(
name = "Experiment Name",
dgp_list = list(`Normal DGP` = normal_dgp, `Bernoulli DGP` = bernoulli_dgp),
method_list = list(`Mean(x)` = mean_method),
evaluator_list = list(`SD of Mean(x)` = sd_mean_eval),
visualizer_list = list(`SD of Mean(x) Plot` = sd_mean_plot),
save_dir = 'path/to/directory'
)
# initialize experiment with toy DGPs, Methods, Evaluators, and Visualizers
# using piping %>%
experiment <- create_experiment(name = "Experiment Name") %>%
add_dgp(normal_dgp) %>%
add_dgp(bernoulli_dgp) %>%
add_method(mean_method) %>%
add_evaluator(sd_mean_eval) %>%
add_visualizer(sd_mean_plot)
# run experiment with 2 replicates
results <- run_experiment(experiment, n_reps = 2)
#> Fitting Experiment Name...
#> 2 reps completed (totals: 2/2) | time taken: 0.044994 minutes
#> ==============================
#> Evaluating Experiment Name...
#> Evaluation completed | time taken: 0.000089 minutes
#> ==============================
#> Visualizing Experiment Name...
#> Visualization completed | time taken: 0.000043 minutes
#> ==============================
# uncomment below to view results
# results
# run experiment with varying number of samples n
experiment <- experiment %>%
add_vary_across(
.dgp = c("Normal DGP", "Bernoulli DGP"), n = c(1, 10)
)
# run vary-across experiment with 2 replicates
results <- run_experiment(experiment, n_reps = 2)
#> Fitting Experiment Name...
#> 2 reps completed (totals: 2/2) | time taken: 0.075092 minutes
#> ==============================
#> Evaluating Experiment Name...
#> Evaluation completed | time taken: 0.000101 minutes
#> ==============================
#> Visualizing Experiment Name...
#> Visualization completed | time taken: 0.000069 minutes
#> ==============================
# uncomment below to view results
# results
# `run_experiment()` above is equivalent to the following sequence of calls
fit_results <- fit_experiment(experiment, n_reps = 2)
#> Fitting Experiment Name...
#> 2 reps completed (totals: 2/2) | time taken: 0.075787 minutes
#> ==============================
eval_results <- evaluate_experiment(experiment, fit_results)
#> Evaluating Experiment Name...
#> Evaluation completed | time taken: 0.000101 minutes
#> ==============================
viz_results <- visualize_experiment(experiment, fit_results, eval_results)
#> Visualizing Experiment Name...
#> Visualization completed | time taken: 0.000070 minutes
#> ==============================
# generate data from all DGPs (and vary across components) in experiment
data_out <- generate_data(experiment, n_reps = 1)