1

我正在尝试编写一个适合从使用 step_ns() 的配方中重新采样的函数。出于某种原因,我收到错误消息:

Fold01: recipe: Error: Not all variables in the recipe are present in the supplied training set

等等所有的折叠。接着

警告信息: All models failed in [fit_resamples()]. See the .notes column.

这是我的代码:

compare_basis_exp_to_base_mod <- function (data, outcome, metric, ...) {

  outcome <- rlang::enquo(outcome)
  metric <- rlang::enquo(metric)
  
  pred_list <- colnames(data)
  
  outcome_str <- substring(deparse(substitute(outcome)), 2)
  outcome_str_id <- which(colnames(data) %in% outcome_str)
  
  predictor <- pred_list[-outcome_str_id]
  
  data <- data %>% 
    rename(prediction = !!outcome)

  res <- tibble(splits = list(), id = character(), .metrics = list(), 
                .notes = list(), .predictions = list(), pred = character())
  
  rec_without_splines <- recipe(prediction ~ ., data = data) %>%
    prep()
  
  rec_with_splines <- recipe(prediction ~ ., data = data) %>%
    step_ns(all_predictors(), ...) %>% 
    prep()
  
  folds_without_splines <- vfold_cv(juice(rec_without_splines), strata = prediction)
  
  folds_with_splines <- vfold_cv(juice(rec_with_splines), strata = prediction)
  

  mod <- linear_reg() %>% 
    set_engine("lm")

  mod_without_splines <- fit_resamples(mod,
                                       rec_without_splines,
                                       folds_without_splines,
                                       metrics = metric_set(!!metric),
                                       control = control_resamples(save_pred = TRUE)) %>%
    mutate(pred = "no_splines")
  
  mod_with_splines <- fit_resamples(mod,
                                    rec_with_splines,
                                    folds_with_splines,
                                    metrics = metric_set(!!metric),
                                    control = control_resamples(save_pred = TRUE)) %>%
    mutate(pred = "with_splines")

  res <- mod_without_splines %>%
    bind_rows(mod_with_splines)
  
  return (res)
}

基本上,参数data采用两列表,并且outcome是结果列的名称。除了使用此功能(我只是在这里玩 tidymodels,因为我是新手),我只想了解导致此错误的原因以及如何修复它。评估时出现错误mod_with_splines

这里遇到了类似的问题。但我不知道这是否与我的问题有关。我不能在把菜谱传给之前先做好准备fit_resamples。(或者我认为)

任何帮助,将不胜感激。谢谢。

4

1 回答 1

2

您的问题来自尝试在已通过相同配方运行的数据集上应用配方。

如果我们假设预测变量是X1X2,那么rec_with_splines这些变量就是预期的。但是因为folds_with_splinescontains 的结果实际上包含rec_with_splines, ,和. 不是和。folds_with_splinesX1_ns_1X1_ns_2X2_ns_1X2_ns_2X1X2

我建议使用工作流来结合预处理和建模步骤。并将原始数据传递到vfold_cv().

library(tidymodels)
compare_basis_exp_to_base_mod <- function (data, outcome, metric, ...) {

  outcome <- rlang::enquo(outcome)
  metric <- rlang::enquo(metric)
  
  pred_list <- colnames(data)
  
  outcome_str <- substring(deparse(substitute(outcome)), 2)
  outcome_str_id <- which(colnames(data) %in% outcome_str)
  
  predictor <- pred_list[-outcome_str_id]
  
  data <- data %>% 
    rename(prediction = !!outcome)
  
  rec_without_splines <- recipe(prediction ~ ., data = data) %>%
    prep()
  
  rec_with_splines <- recipe(prediction ~ ., data = data) %>%
    step_ns(all_predictors(), ...)
  
  mod <- linear_reg() %>% 
    set_engine("lm")
 
  wf_without_splines <- workflow() %>%
    add_recipe(rec_without_splines) %>%
    add_model(mod)
  
  wf_with_splines <- workflow() %>%
    add_recipe(rec_with_splines) %>%
    add_model(mod)
  
  data_folds <- vfold_cv(data, strata = prediction)

  mod_without_splines <- fit_resamples(wf_without_splines,
                                       data_folds,
                                       metrics = metric_set(!!metric),
                                       control = control_resamples(save_pred = TRUE)) %>%
    mutate(pred = "no_splines")
  
  mod_with_splines <- fit_resamples(wf_with_splines,
                                    data_folds,
                                    metrics = metric_set(!!metric),
                                    control = control_resamples(save_pred = TRUE)) %>%
    mutate(pred = "with_splines")

  res <- mod_without_splines %>%
    bind_rows(mod_with_splines)
  
  return (res)
}

res <- compare_basis_exp_to_base_mod(mtcars, mpg, rmse)
于 2020-09-03T05:37:35.813 回答