Skip to contents

These functions make suggestions for code when performing a few common types of experiments (i.e., for prediction, feature selection, and inference). They print out code to the console that could be considered minimal syntax.

Usage

use_prediction_template(
  experiment_name = "Prediction Experiment",
  type = c("regression", "classification"),
  support = FALSE,
  include_dgp_example = FALSE,
  include_method_example = FALSE
)

use_feature_selection_template(
  experiment_name = "Feature Selection Experiment",
  include_dgp_example = FALSE,
  include_method_example = FALSE
)

use_inference_template(
  experiment_name = "Inference Experiment",
  include_dgp_example = FALSE,
  include_method_example = FALSE
)

Arguments

experiment_name

Name of the experiment.

type

Either "regression" or "classification" specifying the type of prediction problem.

support

Logical. If TRUE, include code to evaluate the estimated feature support.

include_dgp_example

Logical. If TRUE, include a completed DGP example, rather than a fill-in-the-blank template.

include_method_example

Logical. If TRUE, include a completed Method example, rather than a fill-in-the-blank template.

Value

Invisible NULL but code is printed to the console.

Examples

# prediction templates
use_prediction_template(type = "regression")
#> dgp <- create_dgp(
#>   .dgp_fun = stop('Add DGP function here.'),
#>   .name = stop('Add name of DGP here.'),
#>   stop('Add additional arguments (if necessary) to pass to DGP here.')
#> ) 
#> 
#> method <- create_method(
#>   .method_fun = stop('Add Method function here.'),
#>   .name = stop('Add name of Method here.'),
#>   stop('Add additional arguments (if necessary) to pass to Method here.')
#> ) 
#> 
#> nested_pred_cols <- stop('(Optional) Add name of column in `fit_results` with prediction result columns to be unnested.')
#> true_pred_col <- stop('Add name of column in `fit_results` with true responses here.')
#> est_pred_col <- stop('Add name of column in `fit_results` with the predicted responses here.')
#> 
#> 
#> pred_err <- create_evaluator(
#>   .eval_fun = summarize_pred_err,
#>   .name = 'Prediction Accuracy',
#>   nested_cols = nested_pred_cols,
#>   truth_col = true_pred_col,
#>   estimate_col = est_pred_col
#> ) 
#> 
#> pred_err_plot <- create_visualizer(
#>   .viz_fun = plot_pred_err,
#>   .name = 'Prediction Accuracy Plot',
#>   eval_name = 'Prediction Accuracy'
#> ) 
#> 
#> experiment <- create_experiment(name = 'Prediction Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(pred_err) %>% 
#>   add_visualizer(pred_err_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#> 
use_prediction_template(type = "classification")
#> dgp <- create_dgp(
#>   .dgp_fun = stop('Add DGP function here.'),
#>   .name = stop('Add name of DGP here.'),
#>   stop('Add additional arguments (if necessary) to pass to DGP here.')
#> ) 
#> 
#> method <- create_method(
#>   .method_fun = stop('Add Method function here.'),
#>   .name = stop('Add name of Method here.'),
#>   stop('Add additional arguments (if necessary) to pass to Method here.')
#> ) 
#> 
#> nested_pred_cols <- stop('(Optional) Add name of column in `fit_results` with prediction result columns to be unnested.')
#> true_pred_col <- stop('Add name of column in `fit_results` with true responses here.')
#> est_pred_col <- stop('Add name of column in `fit_results` with the predicted responses here.')
#> prob_pred_cols <- stop('Add name of column(s) in `fit_results` with the predicted probabilities here.')
#> 
#> 
#> pred_err <- create_evaluator(
#>   .eval_fun = summarize_pred_err,
#>   .name = 'Prediction Accuracy',
#>   nested_cols = nested_pred_cols,
#>   truth_col = true_pred_col,
#>   estimate_col = est_pred_col,
#>   prob_cols = prob_pred_cols
#> ) 
#> 
#> pred_err_plot <- create_visualizer(
#>   .viz_fun = plot_pred_err,
#>   .name = 'Prediction Accuracy Plot',
#>   eval_name = 'Prediction Accuracy'
#> ) 
#> 
#> roc_plot <- create_visualizer(
#>   .viz_fun = plot_pred_curve,
#>   .name = 'ROC Plot',
#>   curve = 'ROC',
#>   eval_fun_options = list(
#>     nested_cols = nested_pred_cols,
#>     truth_col = true_pred_col,
#>     prob_cols = prob_pred_cols
#>   )
#> ) 
#> 
#> pr_plot <- create_visualizer(
#>   .viz_fun = plot_pred_curve,
#>   .name = 'PR Plot',
#>   curve = 'PR',
#>   eval_fun_options = list(
#>     nested_cols = nested_pred_cols,
#>     truth_col = true_pred_col,
#>     prob_cols = prob_pred_cols
#>   )
#> ) 
#> 
#> experiment <- create_experiment(name = 'Prediction Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(pred_err) %>% 
#>   add_visualizer(pred_err_plot) %>% 
#>   add_visualizer(roc_plot) %>% 
#>   add_visualizer(pr_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#> 

# prediction templates with example DGP and Method
use_prediction_template(type = "regression", 
                        include_dgp_example = TRUE,
                        include_method_example = TRUE)
#> if (!require('dplyr')) install.packages('dplyr')
#> if (!require('purrr')) install.packages('purrr')
#> if (!require('ranger')) install.packages('ranger')
#>  
#> 
#> gaussian_linear_dgp <- function (n, p, beta = 1, err_sd = 1, data_split = TRUE, train_prop = 0.5, 
#>     return_support = TRUE) 
#> {
#>     X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#>     beta_vec <- matrix(beta, ncol = 1, nrow = p)
#>     y <- X %*% beta_vec + rnorm(n = n, sd = err_sd)
#>     if (data_split) {
#>         train_ids <- sample(1:n, size = round(n * train_prop))
#>         Xtest <- X[-train_ids, , drop = FALSE]
#>         ytest <- y[-train_ids]
#>         X <- X[train_ids, , drop = FALSE]
#>         y <- y[train_ids]
#>     }
#>     else {
#>         Xtest <- NULL
#>         ytest <- NULL
#>     }
#>     if (return_support) {
#>         support <- which(beta_vec != 0)
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest, 
#>             support = support)
#>     }
#>     else {
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#>     }
#>     return(out)
#> } 
#> 
#> dgp <- create_dgp(
#>   .dgp_fun = gaussian_linear_dgp,
#>   .name = 'Example DGP (Uncorrelated Gaussian Linear DGP)',
#>   n = 200,
#>   p = 10,
#>   beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#>   err_sd = 1,
#>   data_split = TRUE,
#>   train_prop = 0.5,
#>   return_support = TRUE
#> ) 
#> 
#> rf_method <- function (X, y, Xtest, ytest, support, ...) 
#> {
#>     data <- as.data.frame(X) %>% cbind(.y = y)
#>     if (is.factor(y)) {
#>         mtry <- round(sqrt(ncol(X)))
#>     }
#>     else {
#>         mtry <- round(ncol(X)/3)
#>     }
#>     fit <- ranger::ranger(data = data, dependent.variable.name = ".y", 
#>         importance = "impurity", mtry = mtry, num.threads = 1, 
#>         ...)
#>     preds <- stats::predict(fit, as.data.frame(Xtest))$predictions
#>     if (is.factor(y)) {
#>         k <- nlevels(y)
#>         prob_preds <- stats::predict(fit, as.data.frame(Xtest), 
#>             predict.all = TRUE, num.threads = 1)$predictions
#>         prob_preds <- purrr::list_rbind(purrr::map(1:nrow(prob_preds), 
#>             function(i) {
#>                 x <- factor(prob_preds[i, ], levels = 1:k)
#>                 tibble::as_tibble_row(c(prop.table(table(x))))
#>             })) %>% stats::setNames(levels(y)) %>% dplyr::select(-1)
#>     }
#>     else {
#>         prob_preds <- NULL
#>     }
#>     p <- ncol(X)
#>     if (is.null(colnames(X))) {
#>         features <- 1:p
#>     }
#>     else {
#>         features <- colnames(X)
#>     }
#>     out <- list(y = ytest, predictions = preds, prob_predictions = prob_preds, 
#>         support_df = data.frame(feature = features, true_support = 1:p %in% 
#>             support, imp = fit$variable.importance, selected = fit$variable.importance > 
#>             mean(fit$variable.importance)))
#>     return(out)
#> } 
#> 
#> method <- create_method(
#>   .method_fun = rf_method,
#>   .name = 'RF'
#> ) 
#> 
#> nested_pred_cols <- c('y', 'predictions', 'prob_predictions')  # prediction results columns to be unnested
#> true_pred_col <- 'y'  # true response column
#> est_pred_col <- 'predictions'  # predicted response column
#> 
#> 
#> pred_err <- create_evaluator(
#>   .eval_fun = summarize_pred_err,
#>   .name = 'Prediction Accuracy',
#>   nested_cols = nested_pred_cols,
#>   truth_col = true_pred_col,
#>   estimate_col = est_pred_col
#> ) 
#> 
#> pred_err_plot <- create_visualizer(
#>   .viz_fun = plot_pred_err,
#>   .name = 'Prediction Accuracy Plot',
#>   eval_name = 'Prediction Accuracy'
#> ) 
#> 
#> experiment <- create_experiment(name = 'Prediction Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(pred_err) %>% 
#>   add_visualizer(pred_err_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#> 
use_prediction_template(type = "classification", 
                        include_dgp_example = TRUE,
                        include_method_example = TRUE)
#> if (!require('dplyr')) install.packages('dplyr')
#> if (!require('purrr')) install.packages('purrr')
#> if (!require('ranger')) install.packages('ranger')
#>  
#> 
#> gaussian_logistic_dgp <- function (n, p, beta = 1, data_split = TRUE, train_prop = 0.5, 
#>     return_support = TRUE) 
#> {
#>     X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#>     beta_vec <- matrix(beta, ncol = 1, nrow = p)
#>     probs <- 1/(1 + exp(-(X %*% beta_vec)))
#>     y <- as.factor(ifelse(stats::runif(n = n, min = 0, max = 1) > 
#>         probs, "0", "1"))
#>     if (data_split) {
#>         train_ids <- sample(1:n, size = round(n * train_prop))
#>         Xtest <- X[-train_ids, , drop = FALSE]
#>         ytest <- y[-train_ids]
#>         X <- X[train_ids, , drop = FALSE]
#>         y <- y[train_ids]
#>     }
#>     else {
#>         Xtest <- NULL
#>         ytest <- NULL
#>     }
#>     if (return_support) {
#>         support <- which(beta_vec != 0)
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest, 
#>             support = support)
#>     }
#>     else {
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#>     }
#>     return(out)
#> } 
#> 
#> dgp <- create_dgp(
#>   .dgp_fun = gaussian_logistic_dgp,
#>   .name = 'Example DGP (Uncorrelated Gaussian Logistic DGP)',
#>   n = 200,
#>   p = 10,
#>   beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#>   data_split = TRUE,
#>   train_prop = 0.5,
#>   return_support = TRUE
#> ) 
#> 
#> rf_method <- function (X, y, Xtest, ytest, support, ...) 
#> {
#>     data <- as.data.frame(X) %>% cbind(.y = y)
#>     if (is.factor(y)) {
#>         mtry <- round(sqrt(ncol(X)))
#>     }
#>     else {
#>         mtry <- round(ncol(X)/3)
#>     }
#>     fit <- ranger::ranger(data = data, dependent.variable.name = ".y", 
#>         importance = "impurity", mtry = mtry, num.threads = 1, 
#>         ...)
#>     preds <- stats::predict(fit, as.data.frame(Xtest))$predictions
#>     if (is.factor(y)) {
#>         k <- nlevels(y)
#>         prob_preds <- stats::predict(fit, as.data.frame(Xtest), 
#>             predict.all = TRUE, num.threads = 1)$predictions
#>         prob_preds <- purrr::list_rbind(purrr::map(1:nrow(prob_preds), 
#>             function(i) {
#>                 x <- factor(prob_preds[i, ], levels = 1:k)
#>                 tibble::as_tibble_row(c(prop.table(table(x))))
#>             })) %>% stats::setNames(levels(y)) %>% dplyr::select(-1)
#>     }
#>     else {
#>         prob_preds <- NULL
#>     }
#>     p <- ncol(X)
#>     if (is.null(colnames(X))) {
#>         features <- 1:p
#>     }
#>     else {
#>         features <- colnames(X)
#>     }
#>     out <- list(y = ytest, predictions = preds, prob_predictions = prob_preds, 
#>         support_df = data.frame(feature = features, true_support = 1:p %in% 
#>             support, imp = fit$variable.importance, selected = fit$variable.importance > 
#>             mean(fit$variable.importance)))
#>     return(out)
#> } 
#> 
#> method <- create_method(
#>   .method_fun = rf_method,
#>   .name = 'RF'
#> ) 
#> 
#> nested_pred_cols <- c('y', 'predictions', 'prob_predictions')  # prediction results columns to be unnested
#> true_pred_col <- 'y'  # true response column
#> est_pred_col <- 'predictions'  # predicted response column
#> prob_pred_cols <- '1'  # predicted probability columns
#> 
#> 
#> pred_err <- create_evaluator(
#>   .eval_fun = summarize_pred_err,
#>   .name = 'Prediction Accuracy',
#>   nested_cols = nested_pred_cols,
#>   truth_col = true_pred_col,
#>   estimate_col = est_pred_col,
#>   prob_cols = prob_pred_cols
#> ) 
#> 
#> pred_err_plot <- create_visualizer(
#>   .viz_fun = plot_pred_err,
#>   .name = 'Prediction Accuracy Plot',
#>   eval_name = 'Prediction Accuracy'
#> ) 
#> 
#> roc_plot <- create_visualizer(
#>   .viz_fun = plot_pred_curve,
#>   .name = 'ROC Plot',
#>   curve = 'ROC',
#>   eval_fun_options = list(
#>     nested_cols = nested_pred_cols,
#>     truth_col = true_pred_col,
#>     prob_cols = prob_pred_cols
#>   )
#> ) 
#> 
#> pr_plot <- create_visualizer(
#>   .viz_fun = plot_pred_curve,
#>   .name = 'PR Plot',
#>   curve = 'PR',
#>   eval_fun_options = list(
#>     nested_cols = nested_pred_cols,
#>     truth_col = true_pred_col,
#>     prob_cols = prob_pred_cols
#>   )
#> ) 
#> 
#> experiment <- create_experiment(name = 'Prediction Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(pred_err) %>% 
#>   add_visualizer(pred_err_plot) %>% 
#>   add_visualizer(roc_plot) %>% 
#>   add_visualizer(pr_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#> 

# feature selection template
use_feature_selection_template()
#> dgp <- create_dgp(
#>   .dgp_fun = stop('Add DGP function here.'),
#>   .name = stop('Add name of DGP here.'),
#>   stop('Add additional arguments (if necessary) to pass to DGP here.')
#> ) 
#> 
#> method <- create_method(
#>   .method_fun = stop('Add Method function here.'),
#>   .name = stop('Add name of Method here.'),
#>   stop('Add additional arguments (if necessary) to pass to Method here.')
#> ) 
#> 
#> nested_feature_cols <- stop('(Optional) Add name of column in `fit_results` with feature importance columns to be unnested here.')
#> feature_col <- stop('Add name of column in `fit_results` containing the feature names here.')
#> true_feature_col <- stop('Add name of column in `fit_results` containing the true feature support here.')
#> feature_imp_col <- stop('Add name of column in `fit_results` containing the feature importances here.')
#> feature_sel_col <- stop('(Optional) Add name of column in `fit_results` containing the (estimated) selected features here.')
#> 
#> fi <- create_evaluator(
#>   .eval_fun = summarize_feature_importance,
#>   .name = 'Feature Importances',
#>   nested_cols = nested_feature_cols,
#>   feature_col = feature_col,
#>   imp_col = feature_imp_col
#> ) 
#> 
#> feature_sel <- create_evaluator(
#>   .eval_fun = summarize_feature_selection_err,
#>   .name = 'Feature Selection Error',
#>   nested_cols = nested_feature_cols,
#>   truth_col = true_feature_col,
#>   estimate_col = feature_sel_col,
#>   imp_col = feature_imp_col
#> ) 
#> 
#> fi_plot <- create_visualizer(
#>   .viz_fun = plot_feature_importance,
#>   .name = 'Feature Importances Plot',
#>   eval_name = 'Feature Importances',
#>   feature_col = feature_col
#> ) 
#> 
#> feature_sel_plot <- create_visualizer(
#>   .viz_fun = plot_feature_selection_err,
#>   .name = 'Feature Selection Error Plot',
#>   eval_name = 'Feature Selection Error'
#> ) 
#> 
#> experiment <- create_experiment(name = 'Feature Selection Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(fi) %>% 
#>   add_evaluator(feature_sel) %>% 
#>   add_visualizer(fi_plot) %>% 
#>   add_visualizer(feature_sel_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#> 
# feature selection template with example DGP and Method
use_feature_selection_template(include_dgp_example = TRUE, 
                               include_method_example = TRUE)
#> if (!require('dplyr')) install.packages('dplyr')
#> if (!require('purrr')) install.packages('purrr')
#> if (!require('ranger')) install.packages('ranger')
#>  
#> 
#> gaussian_linear_dgp <- function (n, p, beta = 1, err_sd = 1, data_split = TRUE, train_prop = 0.5, 
#>     return_support = TRUE) 
#> {
#>     X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#>     beta_vec <- matrix(beta, ncol = 1, nrow = p)
#>     y <- X %*% beta_vec + rnorm(n = n, sd = err_sd)
#>     if (data_split) {
#>         train_ids <- sample(1:n, size = round(n * train_prop))
#>         Xtest <- X[-train_ids, , drop = FALSE]
#>         ytest <- y[-train_ids]
#>         X <- X[train_ids, , drop = FALSE]
#>         y <- y[train_ids]
#>     }
#>     else {
#>         Xtest <- NULL
#>         ytest <- NULL
#>     }
#>     if (return_support) {
#>         support <- which(beta_vec != 0)
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest, 
#>             support = support)
#>     }
#>     else {
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#>     }
#>     return(out)
#> } 
#> 
#> dgp <- create_dgp(
#>   .dgp_fun = gaussian_linear_dgp,
#>   .name = 'Example DGP (Uncorrelated Gaussian Linear DGP)',
#>   n = 200,
#>   p = 10,
#>   beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#>   err_sd = 1,
#>   data_split = TRUE,
#>   train_prop = 0.5,
#>   return_support = TRUE
#> ) 
#> 
#> rf_method <- function (X, y, Xtest, ytest, support, ...) 
#> {
#>     data <- as.data.frame(X) %>% cbind(.y = y)
#>     if (is.factor(y)) {
#>         mtry <- round(sqrt(ncol(X)))
#>     }
#>     else {
#>         mtry <- round(ncol(X)/3)
#>     }
#>     fit <- ranger::ranger(data = data, dependent.variable.name = ".y", 
#>         importance = "impurity", mtry = mtry, num.threads = 1, 
#>         ...)
#>     preds <- stats::predict(fit, as.data.frame(Xtest))$predictions
#>     if (is.factor(y)) {
#>         k <- nlevels(y)
#>         prob_preds <- stats::predict(fit, as.data.frame(Xtest), 
#>             predict.all = TRUE, num.threads = 1)$predictions
#>         prob_preds <- purrr::list_rbind(purrr::map(1:nrow(prob_preds), 
#>             function(i) {
#>                 x <- factor(prob_preds[i, ], levels = 1:k)
#>                 tibble::as_tibble_row(c(prop.table(table(x))))
#>             })) %>% stats::setNames(levels(y)) %>% dplyr::select(-1)
#>     }
#>     else {
#>         prob_preds <- NULL
#>     }
#>     p <- ncol(X)
#>     if (is.null(colnames(X))) {
#>         features <- 1:p
#>     }
#>     else {
#>         features <- colnames(X)
#>     }
#>     out <- list(y = ytest, predictions = preds, prob_predictions = prob_preds, 
#>         support_df = data.frame(feature = features, true_support = 1:p %in% 
#>             support, imp = fit$variable.importance, selected = fit$variable.importance > 
#>             mean(fit$variable.importance)))
#>     return(out)
#> } 
#> 
#> method <- create_method(
#>   .method_fun = rf_method,
#>   .name = 'RF'
#> ) 
#> 
#> nested_feature_cols <- 'support_df'  # feature importance columns to be unnested
#> feature_col <- 'feature'  # feature names column
#> true_feature_col <- 'true_support'  # true feature support column
#> feature_imp_col <- 'imp'  # feature importance column
#> feature_sel_col <- 'selected'  # estimated feature support column
#> 
#> fi <- create_evaluator(
#>   .eval_fun = summarize_feature_importance,
#>   .name = 'Feature Importances',
#>   nested_cols = nested_feature_cols,
#>   feature_col = feature_col,
#>   imp_col = feature_imp_col
#> ) 
#> 
#> feature_sel <- create_evaluator(
#>   .eval_fun = summarize_feature_selection_err,
#>   .name = 'Feature Selection Error',
#>   nested_cols = nested_feature_cols,
#>   truth_col = true_feature_col,
#>   estimate_col = feature_sel_col,
#>   imp_col = feature_imp_col
#> ) 
#> 
#> fi_plot <- create_visualizer(
#>   .viz_fun = plot_feature_importance,
#>   .name = 'Feature Importances Plot',
#>   eval_name = 'Feature Importances',
#>   feature_col = feature_col
#> ) 
#> 
#> feature_sel_plot <- create_visualizer(
#>   .viz_fun = plot_feature_selection_err,
#>   .name = 'Feature Selection Error Plot',
#>   eval_name = 'Feature Selection Error'
#> ) 
#> 
#> experiment <- create_experiment(name = 'Feature Selection Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(fi) %>% 
#>   add_evaluator(feature_sel) %>% 
#>   add_visualizer(fi_plot) %>% 
#>   add_visualizer(feature_sel_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#> 

# inference template
use_inference_template()
#> dgp <- create_dgp(
#>   .dgp_fun = stop('Add DGP function here.'),
#>   .name = stop('Add name of DGP here.'),
#>   stop('Add additional arguments (if necessary) to pass to DGP here.')
#> ) 
#> 
#> method <- create_method(
#>   .method_fun = stop('Add Method function here.'),
#>   .name = stop('Add name of Method here.'),
#>   stop('Add additional arguments (if necessary) to pass to Method here.')
#> ) 
#> 
#> nested_feature_cols <- stop('(Optional) Add name of column in `fit_results` with feature importance columns to be unnested here.')
#> feature_col <- stop('Add name of column in `fit_results` containing the feature names here.')
#> true_feature_col <- stop('Add name of column in `fit_results` containing the true feature support here.')
#> pval_col <- stop('Add name of column in `fit_results` containing the p-values here.')
#> 
#> inf_err <- create_evaluator(
#>   .eval_fun = summarize_testing_err,
#>   .name = 'Hypothesis Testing Error',
#>   nested_cols = nested_feature_cols,
#>   truth_col = true_feature_col,
#>   pval_col = pval_col
#> ) 
#> 
#> fi_pval <- create_evaluator(
#>   .eval_fun = summarize_feature_importance,
#>   .name = 'P-value Summary Statistics',
#>   eval_id = 'pval',
#>   nested_cols = nested_feature_cols,
#>   feature_col = feature_col,
#>   imp_col = pval_col
#> ) 
#> 
#> inf_err_plot <- create_visualizer(
#>   .viz_fun = plot_testing_err,
#>   .name = 'Hypothesis Testing Error Plot',
#>   eval_name = 'Hypothesis Testing Error'
#> ) 
#> 
#> inf_roc_plot <- create_visualizer(
#>   .viz_fun = plot_testing_curve,
#>   .name = 'Feature ROC Plot',
#>   curve = 'ROC',
#>   eval_fun_options = list(
#>     nested_cols = nested_feature_cols,
#>     truth_col = true_feature_col,
#>     pval_col = pval_col
#>   )
#> ) 
#> 
#> inf_pr_plot <- create_visualizer(
#>   .viz_fun = plot_testing_curve,
#>   .name = 'Feature Selection PR Plot',
#>   curve = 'PR',
#>   eval_fun_options = list(
#>     nested_cols = nested_feature_cols,
#>     truth_col = true_feature_col,
#>     pval_col = pval_col
#>   )
#> ) 
#> 
#> reject_prob_plot <- create_visualizer(
#>   .viz_fun = plot_reject_prob,
#>   .name = 'Rejection Probability Curve',
#>   feature_col = feature_col,
#>   eval_fun_options = list(
#>     nested_cols = nested_feature_cols,
#>     pval_col = pval_col
#>   )
#> ) 
#> 
#> experiment <- create_experiment(name = 'Inference Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(inf_err) %>% 
#>   add_evaluator(fi_pval) %>% 
#>   add_visualizer(inf_err_plot) %>% 
#>   add_visualizer(inf_roc_plot) %>% 
#>   add_visualizer(inf_pr_plot) %>% 
#>   add_visualizer(reject_prob_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#> 
# inference template with example DGP and Method
use_inference_template(include_dgp_example = TRUE,
                       include_method_example = TRUE)
#> if (!require('broom')) install.packages('broom')
#>  
#> 
#> gaussian_linear_dgp <- function (n, p, beta = 1, err_sd = 1, data_split = TRUE, train_prop = 0.5, 
#>     return_support = TRUE) 
#> {
#>     X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#>     beta_vec <- matrix(beta, ncol = 1, nrow = p)
#>     y <- X %*% beta_vec + rnorm(n = n, sd = err_sd)
#>     if (data_split) {
#>         train_ids <- sample(1:n, size = round(n * train_prop))
#>         Xtest <- X[-train_ids, , drop = FALSE]
#>         ytest <- y[-train_ids]
#>         X <- X[train_ids, , drop = FALSE]
#>         y <- y[train_ids]
#>     }
#>     else {
#>         Xtest <- NULL
#>         ytest <- NULL
#>     }
#>     if (return_support) {
#>         support <- which(beta_vec != 0)
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest, 
#>             support = support)
#>     }
#>     else {
#>         out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#>     }
#>     return(out)
#> } 
#> 
#> dgp <- create_dgp(
#>   .dgp_fun = gaussian_linear_dgp,
#>   .name = 'Example DGP (Uncorrelated Gaussian Linear DGP)',
#>   n = 200,
#>   p = 10,
#>   beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#>   err_sd = 1,
#>   data_split = FALSE,
#>   return_support = TRUE
#> ) 
#> 
#> ols_method <- function (X, y, support, ...) 
#> {
#>     data <- as.data.frame(X) %>% cbind(.y = y)
#>     if (is.factor(y)) {
#>         stop("OLS cannot be applied to a factor response.")
#>     }
#>     fit <- stats::lm(.y ~ ., data = data)
#>     p <- ncol(X)
#>     if (is.null(colnames(X))) {
#>         features <- 1:p
#>     }
#>     else {
#>         features <- colnames(X)
#>     }
#>     out <- list(support_df = data.frame(feature = features, true_support = 1:p %in% 
#>         support, pval = broom::tidy(fit)$p.value[-1]))
#>     return(out)
#> } 
#> 
#> method <- create_method(
#>   .method_fun = ols_method,
#>   .name = 'OLS'
#> ) 
#> 
#> nested_feature_cols <- 'support_df'  # feature importance columns to be unnested
#> feature_col <- 'feature'  # feature names column
#> true_feature_col <- 'true_support'  # true feature support column
#> pval_col <- 'pval'  # p-values column
#> 
#> inf_err <- create_evaluator(
#>   .eval_fun = summarize_testing_err,
#>   .name = 'Hypothesis Testing Error',
#>   nested_cols = nested_feature_cols,
#>   truth_col = true_feature_col,
#>   pval_col = pval_col
#> ) 
#> 
#> fi_pval <- create_evaluator(
#>   .eval_fun = summarize_feature_importance,
#>   .name = 'P-value Summary Statistics',
#>   eval_id = 'pval',
#>   nested_cols = nested_feature_cols,
#>   feature_col = feature_col,
#>   imp_col = pval_col
#> ) 
#> 
#> inf_err_plot <- create_visualizer(
#>   .viz_fun = plot_testing_err,
#>   .name = 'Hypothesis Testing Error Plot',
#>   eval_name = 'Hypothesis Testing Error'
#> ) 
#> 
#> inf_roc_plot <- create_visualizer(
#>   .viz_fun = plot_testing_curve,
#>   .name = 'Feature ROC Plot',
#>   curve = 'ROC',
#>   eval_fun_options = list(
#>     nested_cols = nested_feature_cols,
#>     truth_col = true_feature_col,
#>     pval_col = pval_col
#>   )
#> ) 
#> 
#> inf_pr_plot <- create_visualizer(
#>   .viz_fun = plot_testing_curve,
#>   .name = 'Feature Selection PR Plot',
#>   curve = 'PR',
#>   eval_fun_options = list(
#>     nested_cols = nested_feature_cols,
#>     truth_col = true_feature_col,
#>     pval_col = pval_col
#>   )
#> ) 
#> 
#> reject_prob_plot <- create_visualizer(
#>   .viz_fun = plot_reject_prob,
#>   .name = 'Rejection Probability Curve',
#>   feature_col = feature_col,
#>   eval_fun_options = list(
#>     nested_cols = nested_feature_cols,
#>     pval_col = pval_col
#>   )
#> ) 
#> 
#> experiment <- create_experiment(name = 'Inference Experiment') %>% 
#>   add_dgp(dgp) %>% 
#>   add_method(method) %>% 
#>   add_evaluator(inf_err) %>% 
#>   add_evaluator(fi_pval) %>% 
#>   add_visualizer(inf_err_plot) %>% 
#>   add_visualizer(inf_roc_plot) %>% 
#>   add_visualizer(inf_pr_plot) %>% 
#>   add_visualizer(reject_prob_plot) 
#> 
#> init_docs(experiment)  #> fill out documentation before proceeding!
#> 
#> results <- run_experiment(
#>   experiment = experiment,
#>   n_reps = stop('Add number of replicates here.'),
#>   save = TRUE
#> ) 
#> 
#> render_docs(experiment)
#>