Functions to create boilerplate code for specific types of experiments.
Source:R/use_templates.R
library_templates.Rd
These functions make suggestions for code when performing a few common types of experiments (i.e., for prediction, feature selection, and inference). They print out code to the console that could be considered minimal syntax.
Usage
use_prediction_template(
experiment_name = "Prediction Experiment",
type = c("regression", "classification"),
support = FALSE,
include_dgp_example = FALSE,
include_method_example = FALSE
)
use_feature_selection_template(
experiment_name = "Feature Selection Experiment",
include_dgp_example = FALSE,
include_method_example = FALSE
)
use_inference_template(
experiment_name = "Inference Experiment",
include_dgp_example = FALSE,
include_method_example = FALSE
)
Arguments
- experiment_name
Name of the experiment.
- type
Either "regression" or "classification" specifying the type of prediction problem.
- support
Logical. If
TRUE
, include code to evaluate the estimated feature support.- include_dgp_example
Logical. If
TRUE
, include a completed DGP example, rather than a fill-in-the-blank template.- include_method_example
Logical. If
TRUE
, include a completed Method example, rather than a fill-in-the-blank template.
Examples
# prediction templates
use_prediction_template(type = "regression")
#> dgp <- create_dgp(
#> .dgp_fun = stop('Add DGP function here.'),
#> .name = stop('Add name of DGP here.'),
#> stop('Add additional arguments (if necessary) to pass to DGP here.')
#> )
#>
#> method <- create_method(
#> .method_fun = stop('Add Method function here.'),
#> .name = stop('Add name of Method here.'),
#> stop('Add additional arguments (if necessary) to pass to Method here.')
#> )
#>
#> nested_pred_cols <- stop('(Optional) Add name of column in `fit_results` with prediction result columns to be unnested.')
#> true_pred_col <- stop('Add name of column in `fit_results` with true responses here.')
#> est_pred_col <- stop('Add name of column in `fit_results` with the predicted responses here.')
#>
#>
#> pred_err <- create_evaluator(
#> .eval_fun = summarize_pred_err,
#> .name = 'Prediction Accuracy',
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> estimate_col = est_pred_col
#> )
#>
#> pred_err_plot <- create_visualizer(
#> .viz_fun = plot_pred_err,
#> .name = 'Prediction Accuracy Plot',
#> eval_name = 'Prediction Accuracy'
#> )
#>
#> experiment <- create_experiment(name = 'Prediction Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(pred_err) %>%
#> add_visualizer(pred_err_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>
use_prediction_template(type = "classification")
#> dgp <- create_dgp(
#> .dgp_fun = stop('Add DGP function here.'),
#> .name = stop('Add name of DGP here.'),
#> stop('Add additional arguments (if necessary) to pass to DGP here.')
#> )
#>
#> method <- create_method(
#> .method_fun = stop('Add Method function here.'),
#> .name = stop('Add name of Method here.'),
#> stop('Add additional arguments (if necessary) to pass to Method here.')
#> )
#>
#> nested_pred_cols <- stop('(Optional) Add name of column in `fit_results` with prediction result columns to be unnested.')
#> true_pred_col <- stop('Add name of column in `fit_results` with true responses here.')
#> est_pred_col <- stop('Add name of column in `fit_results` with the predicted responses here.')
#> prob_pred_cols <- stop('Add name of column(s) in `fit_results` with the predicted probabilities here.')
#>
#>
#> pred_err <- create_evaluator(
#> .eval_fun = summarize_pred_err,
#> .name = 'Prediction Accuracy',
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> estimate_col = est_pred_col,
#> prob_cols = prob_pred_cols
#> )
#>
#> pred_err_plot <- create_visualizer(
#> .viz_fun = plot_pred_err,
#> .name = 'Prediction Accuracy Plot',
#> eval_name = 'Prediction Accuracy'
#> )
#>
#> roc_plot <- create_visualizer(
#> .viz_fun = plot_pred_curve,
#> .name = 'ROC Plot',
#> curve = 'ROC',
#> eval_fun_options = list(
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> prob_cols = prob_pred_cols
#> )
#> )
#>
#> pr_plot <- create_visualizer(
#> .viz_fun = plot_pred_curve,
#> .name = 'PR Plot',
#> curve = 'PR',
#> eval_fun_options = list(
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> prob_cols = prob_pred_cols
#> )
#> )
#>
#> experiment <- create_experiment(name = 'Prediction Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(pred_err) %>%
#> add_visualizer(pred_err_plot) %>%
#> add_visualizer(roc_plot) %>%
#> add_visualizer(pr_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>
# prediction templates with example DGP and Method
use_prediction_template(type = "regression",
include_dgp_example = TRUE,
include_method_example = TRUE)
#> if (!require('dplyr')) install.packages('dplyr')
#> if (!require('purrr')) install.packages('purrr')
#> if (!require('ranger')) install.packages('ranger')
#>
#>
#> gaussian_linear_dgp <- function (n, p, beta = 1, err_sd = 1, data_split = TRUE, train_prop = 0.5,
#> return_support = TRUE)
#> {
#> X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#> beta_vec <- matrix(beta, ncol = 1, nrow = p)
#> y <- X %*% beta_vec + rnorm(n = n, sd = err_sd)
#> if (data_split) {
#> train_ids <- sample(1:n, size = round(n * train_prop))
#> Xtest <- X[-train_ids, , drop = FALSE]
#> ytest <- y[-train_ids]
#> X <- X[train_ids, , drop = FALSE]
#> y <- y[train_ids]
#> }
#> else {
#> Xtest <- NULL
#> ytest <- NULL
#> }
#> if (return_support) {
#> support <- which(beta_vec != 0)
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest,
#> support = support)
#> }
#> else {
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#> }
#> return(out)
#> }
#>
#> dgp <- create_dgp(
#> .dgp_fun = gaussian_linear_dgp,
#> .name = 'Example DGP (Uncorrelated Gaussian Linear DGP)',
#> n = 200,
#> p = 10,
#> beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#> err_sd = 1,
#> data_split = TRUE,
#> train_prop = 0.5,
#> return_support = TRUE
#> )
#>
#> rf_method <- function (X, y, Xtest, ytest, support, ...)
#> {
#> data <- as.data.frame(X) %>% cbind(.y = y)
#> if (is.factor(y)) {
#> mtry <- round(sqrt(ncol(X)))
#> }
#> else {
#> mtry <- round(ncol(X)/3)
#> }
#> fit <- ranger::ranger(data = data, dependent.variable.name = ".y",
#> importance = "impurity", mtry = mtry, num.threads = 1,
#> ...)
#> preds <- stats::predict(fit, as.data.frame(Xtest))$predictions
#> if (is.factor(y)) {
#> k <- nlevels(y)
#> prob_preds <- stats::predict(fit, as.data.frame(Xtest),
#> predict.all = TRUE, num.threads = 1)$predictions
#> prob_preds <- purrr::list_rbind(purrr::map(1:nrow(prob_preds),
#> function(i) {
#> x <- factor(prob_preds[i, ], levels = 1:k)
#> tibble::as_tibble_row(c(prop.table(table(x))))
#> })) %>% stats::setNames(levels(y)) %>% dplyr::select(-1)
#> }
#> else {
#> prob_preds <- NULL
#> }
#> p <- ncol(X)
#> if (is.null(colnames(X))) {
#> features <- 1:p
#> }
#> else {
#> features <- colnames(X)
#> }
#> out <- list(y = ytest, predictions = preds, prob_predictions = prob_preds,
#> support_df = data.frame(feature = features, true_support = 1:p %in%
#> support, imp = fit$variable.importance, selected = fit$variable.importance >
#> mean(fit$variable.importance)))
#> return(out)
#> }
#>
#> method <- create_method(
#> .method_fun = rf_method,
#> .name = 'RF'
#> )
#>
#> nested_pred_cols <- c('y', 'predictions', 'prob_predictions') # prediction results columns to be unnested
#> true_pred_col <- 'y' # true response column
#> est_pred_col <- 'predictions' # predicted response column
#>
#>
#> pred_err <- create_evaluator(
#> .eval_fun = summarize_pred_err,
#> .name = 'Prediction Accuracy',
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> estimate_col = est_pred_col
#> )
#>
#> pred_err_plot <- create_visualizer(
#> .viz_fun = plot_pred_err,
#> .name = 'Prediction Accuracy Plot',
#> eval_name = 'Prediction Accuracy'
#> )
#>
#> experiment <- create_experiment(name = 'Prediction Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(pred_err) %>%
#> add_visualizer(pred_err_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>
use_prediction_template(type = "classification",
include_dgp_example = TRUE,
include_method_example = TRUE)
#> if (!require('dplyr')) install.packages('dplyr')
#> if (!require('purrr')) install.packages('purrr')
#> if (!require('ranger')) install.packages('ranger')
#>
#>
#> gaussian_logistic_dgp <- function (n, p, beta = 1, data_split = TRUE, train_prop = 0.5,
#> return_support = TRUE)
#> {
#> X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#> beta_vec <- matrix(beta, ncol = 1, nrow = p)
#> probs <- 1/(1 + exp(-(X %*% beta_vec)))
#> y <- as.factor(ifelse(stats::runif(n = n, min = 0, max = 1) >
#> probs, "0", "1"))
#> if (data_split) {
#> train_ids <- sample(1:n, size = round(n * train_prop))
#> Xtest <- X[-train_ids, , drop = FALSE]
#> ytest <- y[-train_ids]
#> X <- X[train_ids, , drop = FALSE]
#> y <- y[train_ids]
#> }
#> else {
#> Xtest <- NULL
#> ytest <- NULL
#> }
#> if (return_support) {
#> support <- which(beta_vec != 0)
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest,
#> support = support)
#> }
#> else {
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#> }
#> return(out)
#> }
#>
#> dgp <- create_dgp(
#> .dgp_fun = gaussian_logistic_dgp,
#> .name = 'Example DGP (Uncorrelated Gaussian Logistic DGP)',
#> n = 200,
#> p = 10,
#> beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#> data_split = TRUE,
#> train_prop = 0.5,
#> return_support = TRUE
#> )
#>
#> rf_method <- function (X, y, Xtest, ytest, support, ...)
#> {
#> data <- as.data.frame(X) %>% cbind(.y = y)
#> if (is.factor(y)) {
#> mtry <- round(sqrt(ncol(X)))
#> }
#> else {
#> mtry <- round(ncol(X)/3)
#> }
#> fit <- ranger::ranger(data = data, dependent.variable.name = ".y",
#> importance = "impurity", mtry = mtry, num.threads = 1,
#> ...)
#> preds <- stats::predict(fit, as.data.frame(Xtest))$predictions
#> if (is.factor(y)) {
#> k <- nlevels(y)
#> prob_preds <- stats::predict(fit, as.data.frame(Xtest),
#> predict.all = TRUE, num.threads = 1)$predictions
#> prob_preds <- purrr::list_rbind(purrr::map(1:nrow(prob_preds),
#> function(i) {
#> x <- factor(prob_preds[i, ], levels = 1:k)
#> tibble::as_tibble_row(c(prop.table(table(x))))
#> })) %>% stats::setNames(levels(y)) %>% dplyr::select(-1)
#> }
#> else {
#> prob_preds <- NULL
#> }
#> p <- ncol(X)
#> if (is.null(colnames(X))) {
#> features <- 1:p
#> }
#> else {
#> features <- colnames(X)
#> }
#> out <- list(y = ytest, predictions = preds, prob_predictions = prob_preds,
#> support_df = data.frame(feature = features, true_support = 1:p %in%
#> support, imp = fit$variable.importance, selected = fit$variable.importance >
#> mean(fit$variable.importance)))
#> return(out)
#> }
#>
#> method <- create_method(
#> .method_fun = rf_method,
#> .name = 'RF'
#> )
#>
#> nested_pred_cols <- c('y', 'predictions', 'prob_predictions') # prediction results columns to be unnested
#> true_pred_col <- 'y' # true response column
#> est_pred_col <- 'predictions' # predicted response column
#> prob_pred_cols <- '1' # predicted probability columns
#>
#>
#> pred_err <- create_evaluator(
#> .eval_fun = summarize_pred_err,
#> .name = 'Prediction Accuracy',
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> estimate_col = est_pred_col,
#> prob_cols = prob_pred_cols
#> )
#>
#> pred_err_plot <- create_visualizer(
#> .viz_fun = plot_pred_err,
#> .name = 'Prediction Accuracy Plot',
#> eval_name = 'Prediction Accuracy'
#> )
#>
#> roc_plot <- create_visualizer(
#> .viz_fun = plot_pred_curve,
#> .name = 'ROC Plot',
#> curve = 'ROC',
#> eval_fun_options = list(
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> prob_cols = prob_pred_cols
#> )
#> )
#>
#> pr_plot <- create_visualizer(
#> .viz_fun = plot_pred_curve,
#> .name = 'PR Plot',
#> curve = 'PR',
#> eval_fun_options = list(
#> nested_cols = nested_pred_cols,
#> truth_col = true_pred_col,
#> prob_cols = prob_pred_cols
#> )
#> )
#>
#> experiment <- create_experiment(name = 'Prediction Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(pred_err) %>%
#> add_visualizer(pred_err_plot) %>%
#> add_visualizer(roc_plot) %>%
#> add_visualizer(pr_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>
# feature selection template
use_feature_selection_template()
#> dgp <- create_dgp(
#> .dgp_fun = stop('Add DGP function here.'),
#> .name = stop('Add name of DGP here.'),
#> stop('Add additional arguments (if necessary) to pass to DGP here.')
#> )
#>
#> method <- create_method(
#> .method_fun = stop('Add Method function here.'),
#> .name = stop('Add name of Method here.'),
#> stop('Add additional arguments (if necessary) to pass to Method here.')
#> )
#>
#> nested_feature_cols <- stop('(Optional) Add name of column in `fit_results` with feature importance columns to be unnested here.')
#> feature_col <- stop('Add name of column in `fit_results` containing the feature names here.')
#> true_feature_col <- stop('Add name of column in `fit_results` containing the true feature support here.')
#> feature_imp_col <- stop('Add name of column in `fit_results` containing the feature importances here.')
#> feature_sel_col <- stop('(Optional) Add name of column in `fit_results` containing the (estimated) selected features here.')
#>
#> fi <- create_evaluator(
#> .eval_fun = summarize_feature_importance,
#> .name = 'Feature Importances',
#> nested_cols = nested_feature_cols,
#> feature_col = feature_col,
#> imp_col = feature_imp_col
#> )
#>
#> feature_sel <- create_evaluator(
#> .eval_fun = summarize_feature_selection_err,
#> .name = 'Feature Selection Error',
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> estimate_col = feature_sel_col,
#> imp_col = feature_imp_col
#> )
#>
#> fi_plot <- create_visualizer(
#> .viz_fun = plot_feature_importance,
#> .name = 'Feature Importances Plot',
#> eval_name = 'Feature Importances',
#> feature_col = feature_col
#> )
#>
#> feature_sel_plot <- create_visualizer(
#> .viz_fun = plot_feature_selection_err,
#> .name = 'Feature Selection Error Plot',
#> eval_name = 'Feature Selection Error'
#> )
#>
#> experiment <- create_experiment(name = 'Feature Selection Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(fi) %>%
#> add_evaluator(feature_sel) %>%
#> add_visualizer(fi_plot) %>%
#> add_visualizer(feature_sel_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>
# feature selection template with example DGP and Method
use_feature_selection_template(include_dgp_example = TRUE,
include_method_example = TRUE)
#> if (!require('dplyr')) install.packages('dplyr')
#> if (!require('purrr')) install.packages('purrr')
#> if (!require('ranger')) install.packages('ranger')
#>
#>
#> gaussian_linear_dgp <- function (n, p, beta = 1, err_sd = 1, data_split = TRUE, train_prop = 0.5,
#> return_support = TRUE)
#> {
#> X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#> beta_vec <- matrix(beta, ncol = 1, nrow = p)
#> y <- X %*% beta_vec + rnorm(n = n, sd = err_sd)
#> if (data_split) {
#> train_ids <- sample(1:n, size = round(n * train_prop))
#> Xtest <- X[-train_ids, , drop = FALSE]
#> ytest <- y[-train_ids]
#> X <- X[train_ids, , drop = FALSE]
#> y <- y[train_ids]
#> }
#> else {
#> Xtest <- NULL
#> ytest <- NULL
#> }
#> if (return_support) {
#> support <- which(beta_vec != 0)
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest,
#> support = support)
#> }
#> else {
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#> }
#> return(out)
#> }
#>
#> dgp <- create_dgp(
#> .dgp_fun = gaussian_linear_dgp,
#> .name = 'Example DGP (Uncorrelated Gaussian Linear DGP)',
#> n = 200,
#> p = 10,
#> beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#> err_sd = 1,
#> data_split = TRUE,
#> train_prop = 0.5,
#> return_support = TRUE
#> )
#>
#> rf_method <- function (X, y, Xtest, ytest, support, ...)
#> {
#> data <- as.data.frame(X) %>% cbind(.y = y)
#> if (is.factor(y)) {
#> mtry <- round(sqrt(ncol(X)))
#> }
#> else {
#> mtry <- round(ncol(X)/3)
#> }
#> fit <- ranger::ranger(data = data, dependent.variable.name = ".y",
#> importance = "impurity", mtry = mtry, num.threads = 1,
#> ...)
#> preds <- stats::predict(fit, as.data.frame(Xtest))$predictions
#> if (is.factor(y)) {
#> k <- nlevels(y)
#> prob_preds <- stats::predict(fit, as.data.frame(Xtest),
#> predict.all = TRUE, num.threads = 1)$predictions
#> prob_preds <- purrr::list_rbind(purrr::map(1:nrow(prob_preds),
#> function(i) {
#> x <- factor(prob_preds[i, ], levels = 1:k)
#> tibble::as_tibble_row(c(prop.table(table(x))))
#> })) %>% stats::setNames(levels(y)) %>% dplyr::select(-1)
#> }
#> else {
#> prob_preds <- NULL
#> }
#> p <- ncol(X)
#> if (is.null(colnames(X))) {
#> features <- 1:p
#> }
#> else {
#> features <- colnames(X)
#> }
#> out <- list(y = ytest, predictions = preds, prob_predictions = prob_preds,
#> support_df = data.frame(feature = features, true_support = 1:p %in%
#> support, imp = fit$variable.importance, selected = fit$variable.importance >
#> mean(fit$variable.importance)))
#> return(out)
#> }
#>
#> method <- create_method(
#> .method_fun = rf_method,
#> .name = 'RF'
#> )
#>
#> nested_feature_cols <- 'support_df' # feature importance columns to be unnested
#> feature_col <- 'feature' # feature names column
#> true_feature_col <- 'true_support' # true feature support column
#> feature_imp_col <- 'imp' # feature importance column
#> feature_sel_col <- 'selected' # estimated feature support column
#>
#> fi <- create_evaluator(
#> .eval_fun = summarize_feature_importance,
#> .name = 'Feature Importances',
#> nested_cols = nested_feature_cols,
#> feature_col = feature_col,
#> imp_col = feature_imp_col
#> )
#>
#> feature_sel <- create_evaluator(
#> .eval_fun = summarize_feature_selection_err,
#> .name = 'Feature Selection Error',
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> estimate_col = feature_sel_col,
#> imp_col = feature_imp_col
#> )
#>
#> fi_plot <- create_visualizer(
#> .viz_fun = plot_feature_importance,
#> .name = 'Feature Importances Plot',
#> eval_name = 'Feature Importances',
#> feature_col = feature_col
#> )
#>
#> feature_sel_plot <- create_visualizer(
#> .viz_fun = plot_feature_selection_err,
#> .name = 'Feature Selection Error Plot',
#> eval_name = 'Feature Selection Error'
#> )
#>
#> experiment <- create_experiment(name = 'Feature Selection Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(fi) %>%
#> add_evaluator(feature_sel) %>%
#> add_visualizer(fi_plot) %>%
#> add_visualizer(feature_sel_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>
# inference template
use_inference_template()
#> dgp <- create_dgp(
#> .dgp_fun = stop('Add DGP function here.'),
#> .name = stop('Add name of DGP here.'),
#> stop('Add additional arguments (if necessary) to pass to DGP here.')
#> )
#>
#> method <- create_method(
#> .method_fun = stop('Add Method function here.'),
#> .name = stop('Add name of Method here.'),
#> stop('Add additional arguments (if necessary) to pass to Method here.')
#> )
#>
#> nested_feature_cols <- stop('(Optional) Add name of column in `fit_results` with feature importance columns to be unnested here.')
#> feature_col <- stop('Add name of column in `fit_results` containing the feature names here.')
#> true_feature_col <- stop('Add name of column in `fit_results` containing the true feature support here.')
#> pval_col <- stop('Add name of column in `fit_results` containing the p-values here.')
#>
#> inf_err <- create_evaluator(
#> .eval_fun = summarize_testing_err,
#> .name = 'Hypothesis Testing Error',
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> pval_col = pval_col
#> )
#>
#> fi_pval <- create_evaluator(
#> .eval_fun = summarize_feature_importance,
#> .name = 'P-value Summary Statistics',
#> eval_id = 'pval',
#> nested_cols = nested_feature_cols,
#> feature_col = feature_col,
#> imp_col = pval_col
#> )
#>
#> inf_err_plot <- create_visualizer(
#> .viz_fun = plot_testing_err,
#> .name = 'Hypothesis Testing Error Plot',
#> eval_name = 'Hypothesis Testing Error'
#> )
#>
#> inf_roc_plot <- create_visualizer(
#> .viz_fun = plot_testing_curve,
#> .name = 'Feature ROC Plot',
#> curve = 'ROC',
#> eval_fun_options = list(
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> pval_col = pval_col
#> )
#> )
#>
#> inf_pr_plot <- create_visualizer(
#> .viz_fun = plot_testing_curve,
#> .name = 'Feature Selection PR Plot',
#> curve = 'PR',
#> eval_fun_options = list(
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> pval_col = pval_col
#> )
#> )
#>
#> reject_prob_plot <- create_visualizer(
#> .viz_fun = plot_reject_prob,
#> .name = 'Rejection Probability Curve',
#> feature_col = feature_col,
#> eval_fun_options = list(
#> nested_cols = nested_feature_cols,
#> pval_col = pval_col
#> )
#> )
#>
#> experiment <- create_experiment(name = 'Inference Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(inf_err) %>%
#> add_evaluator(fi_pval) %>%
#> add_visualizer(inf_err_plot) %>%
#> add_visualizer(inf_roc_plot) %>%
#> add_visualizer(inf_pr_plot) %>%
#> add_visualizer(reject_prob_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>
# inference template with example DGP and Method
use_inference_template(include_dgp_example = TRUE,
include_method_example = TRUE)
#> if (!require('broom')) install.packages('broom')
#>
#>
#> gaussian_linear_dgp <- function (n, p, beta = 1, err_sd = 1, data_split = TRUE, train_prop = 0.5,
#> return_support = TRUE)
#> {
#> X <- matrix(stats::rnorm(n * p), nrow = n, ncol = p)
#> beta_vec <- matrix(beta, ncol = 1, nrow = p)
#> y <- X %*% beta_vec + rnorm(n = n, sd = err_sd)
#> if (data_split) {
#> train_ids <- sample(1:n, size = round(n * train_prop))
#> Xtest <- X[-train_ids, , drop = FALSE]
#> ytest <- y[-train_ids]
#> X <- X[train_ids, , drop = FALSE]
#> y <- y[train_ids]
#> }
#> else {
#> Xtest <- NULL
#> ytest <- NULL
#> }
#> if (return_support) {
#> support <- which(beta_vec != 0)
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest,
#> support = support)
#> }
#> else {
#> out <- list(X = X, y = y, Xtest = Xtest, ytest = ytest)
#> }
#> return(out)
#> }
#>
#> dgp <- create_dgp(
#> .dgp_fun = gaussian_linear_dgp,
#> .name = 'Example DGP (Uncorrelated Gaussian Linear DGP)',
#> n = 200,
#> p = 10,
#> beta = c(1, 1, 1, 1, 1, 0, 0, 0, 0, 0),
#> err_sd = 1,
#> data_split = FALSE,
#> return_support = TRUE
#> )
#>
#> ols_method <- function (X, y, support, ...)
#> {
#> data <- as.data.frame(X) %>% cbind(.y = y)
#> if (is.factor(y)) {
#> stop("OLS cannot be applied to a factor response.")
#> }
#> fit <- stats::lm(.y ~ ., data = data)
#> p <- ncol(X)
#> if (is.null(colnames(X))) {
#> features <- 1:p
#> }
#> else {
#> features <- colnames(X)
#> }
#> out <- list(support_df = data.frame(feature = features, true_support = 1:p %in%
#> support, pval = broom::tidy(fit)$p.value[-1]))
#> return(out)
#> }
#>
#> method <- create_method(
#> .method_fun = ols_method,
#> .name = 'OLS'
#> )
#>
#> nested_feature_cols <- 'support_df' # feature importance columns to be unnested
#> feature_col <- 'feature' # feature names column
#> true_feature_col <- 'true_support' # true feature support column
#> pval_col <- 'pval' # p-values column
#>
#> inf_err <- create_evaluator(
#> .eval_fun = summarize_testing_err,
#> .name = 'Hypothesis Testing Error',
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> pval_col = pval_col
#> )
#>
#> fi_pval <- create_evaluator(
#> .eval_fun = summarize_feature_importance,
#> .name = 'P-value Summary Statistics',
#> eval_id = 'pval',
#> nested_cols = nested_feature_cols,
#> feature_col = feature_col,
#> imp_col = pval_col
#> )
#>
#> inf_err_plot <- create_visualizer(
#> .viz_fun = plot_testing_err,
#> .name = 'Hypothesis Testing Error Plot',
#> eval_name = 'Hypothesis Testing Error'
#> )
#>
#> inf_roc_plot <- create_visualizer(
#> .viz_fun = plot_testing_curve,
#> .name = 'Feature ROC Plot',
#> curve = 'ROC',
#> eval_fun_options = list(
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> pval_col = pval_col
#> )
#> )
#>
#> inf_pr_plot <- create_visualizer(
#> .viz_fun = plot_testing_curve,
#> .name = 'Feature Selection PR Plot',
#> curve = 'PR',
#> eval_fun_options = list(
#> nested_cols = nested_feature_cols,
#> truth_col = true_feature_col,
#> pval_col = pval_col
#> )
#> )
#>
#> reject_prob_plot <- create_visualizer(
#> .viz_fun = plot_reject_prob,
#> .name = 'Rejection Probability Curve',
#> feature_col = feature_col,
#> eval_fun_options = list(
#> nested_cols = nested_feature_cols,
#> pval_col = pval_col
#> )
#> )
#>
#> experiment <- create_experiment(name = 'Inference Experiment') %>%
#> add_dgp(dgp) %>%
#> add_method(method) %>%
#> add_evaluator(inf_err) %>%
#> add_evaluator(fi_pval) %>%
#> add_visualizer(inf_err_plot) %>%
#> add_visualizer(inf_roc_plot) %>%
#> add_visualizer(inf_pr_plot) %>%
#> add_visualizer(reject_prob_plot)
#>
#> init_docs(experiment) #> fill out documentation before proceeding!
#>
#> results <- run_experiment(
#> experiment = experiment,
#> n_reps = stop('Add number of replicates here.'),
#> save = TRUE
#> )
#>
#> render_docs(experiment)
#>