我正在使用单个数字变量调整随机森林。
数据表结构如下: tibble [617,622 x 29] (S3: tbl_df/tbl/data.frame) 和我拆分数据:
set.seed(123)
data_split <- initial_split(data, strata = var_class, prop = .70)
data_train <- training(data_split )
data_test <- as.data.frame(testing(data_split ))
使用以下配方对象和工作流程:
rec_1v_s <- data_train %>%
recipe(var_class ~ var1) %>%
step_naomit(everything(), skip = TRUE) %>%
step_normalize(all_numeric()) %>%
step_smote(var_class)
model_to_tune <- rand_forest(mode = "classification",
trees = tune())%>% set_engine("ranger")
wflow_rf_1v <-
workflow() %>%
add_model(model_to_tune) %>%
add_recipe(rec_1v_s )
我想调整树木。
set.seed(123)
rf_grid <- grid_latin_hypercube(
trees(),
size = 40)
race_ctrl <-
control_race(
save_pred = TRUE,
parallel_over = "everything",
save_workflow = FALSE
)
然后我调整随机森林引擎的树:
tictoc::tic()
all_cores <- parallel::detectCores(logical = FALSE)
library(doFuture)
registerDoFuture()
cl <- parallel::makeCluster(all_cores-4)
plan(cluster, workers = cl)
# Option 1 tune_race
rf_tune_race <- wflow_rf_1v %>%
tune_race_win_loss(resamples = folds,
grid = rf_grid,
control = race_ctrl,
metrics = metric_set(roc_auc, accuracy))
# Option 2 tune_grid
rf_tune_grid <- wflow_rf_1v %>%
tune_grid(resamples = folds,
grid = rf_grid,
control = race_ctrl,
metrics = metric_set(roc_auc, accuracy))
tictoc::toc()
使用相同的规范,如果我运行 tune_grid 不会产生错误并且我有结果但是如果我运行 tune_race (anova or win_loss) 我会收到以下错误:
错误:arrange() 在隐式 mutate() 步骤失败。x 无法将大小 0 的输入回收到大小 1
tune_race_anova 和 tune_race_loss_win 仍然存在错误
该错误没有提供太多信息,我无法检测到它的来源。
以防万一它有帮助,我添加了 rlang 提供的错误的详细信息
rlang::last_error()
<error/dplyr_error>
arrange() failed at implicit mutate() step.
x Can't recycle input of size 0 to size 1.
Backtrace:
Run `rlang::last_trace()` to see the full context.
rlang::last_trace()
<error/dplyr_error>
arrange() failed at implicit mutate() step.
x Can't recycle input of size 0 to size 1.
Backtrace:
x
1. +-`%>%`(...)
2. +-finetune::tune_race_win_loss(...)
3. +-finetune:::tune_race_win_loss.workflow(...)
4. | \-finetune:::tune_race_win_loss_workflow(...)
5. | \-`%>%`(...)
6. +-tune::tune_grid(...)
7. +-tune:::tune_grid.workflow(...)
8. | \-tune:::tune_grid_workflow(...)
9. | \-tune:::tune_grid_loop(...)
10. | \-tune:::pull_metrics(resamples, results, control)
11. | \-tune:::pulley(resamples, res, ".metrics")
12. | +-dplyr::arrange(resamples, !!!syms(id_cols))
13. | \-dplyr:::arrange.data.frame(resamples, !!!syms(id_cols))
14. | \-dplyr:::arrange_rows(.data, dots)
15. | +-base::withCallingHandlers(...)
16. | +-dplyr::transmute(new_data_frame(.data), !!!quosures)
17. | \-dplyr:::transmute.data.frame(new_data_frame(.data), !!!quosures)
18. | +-dplyr::mutate(.data, !!!dots, .keep = "none")
19. | \-dplyr:::mutate.data.frame(.data, !!!dots, .keep = "none")
20. | +-dplyr::dplyr_col_modify(.data, cols)
21. | \-dplyr:::dplyr_col_modify.data.frame(.data, cols)
22. | \-vctrs::vec_recycle_common(!!!cols, .size = nrow(data))
23. +-vctrs:::stop_recycle_incompatible_size(...)
24. | \-vctrs:::stop_vctrs(...)
25. | \-rlang::abort(message, class = c(class, "vctrs_error"), ...)
26. | \-rlang:::signal_abort(cnd)
27. | \-base::signalCondition(cnd)
28. \-(function (cnd) ...