3

这是我在堆栈溢出上发布的第一个问题。我已尽力使用 reprex r 包创建一个像样的 reprex。任何反馈表示赞赏。开始:

我相信错误位于我的代码的最底部。

我一直在尝试学习如何使用 tidymodels 套件的 r 包,在尝试使用 tune_grid 调整树的 # 和随机森林的 min_n 时遇到了错误。我有点关注这里的博客文章http://www.rebeccabarter.com/blog/2020-03-25_machine_learning/

据我了解,您可以使用工作流将配方和模型捆绑在一起,然后将其输入到 tune_grid 函数中,并通过某种重新采样(如 cv)来调整超参数。我一定是在某个地方出错了,因为 tune_grid 函数没有成功运行。

这是我的代码:

#install.packages("pacman")
#install.packages("ranger")

pacman::p_load(tidyverse, # all the things
               tidymodels, workflows, tune # tidy ml
)



dat <- ggplot2::mpg %>% 
    mutate(
        trans2 = if_else(str_detect(trans, "auto"), "auto", "manual")
        ) %>% 
    select(-trans)

head(dat)
#> # A tibble: 6 x 11
#>   manufacturer model displ  year   cyl drv     cty   hwy fl    class   trans2
#>   <chr>        <chr> <dbl> <int> <int> <chr> <int> <int> <chr> <chr>   <chr> 
#> 1 audi         a4      1.8  1999     4 f        18    29 p     compact auto  
#> 2 audi         a4      1.8  1999     4 f        21    29 p     compact manual
#> 3 audi         a4      2    2008     4 f        20    31 p     compact manual
#> 4 audi         a4      2    2008     4 f        21    30 p     compact auto  
#> 5 audi         a4      2.8  1999     6 f        16    26 p     compact auto  
#> 6 audi         a4      2.8  1999     6 f        18    26 p     compact manual

dat_split <- initial_split(dat, prop = 3/4, strata = trans2)

dat_split
#> <Training/Validation/Total>
#> <176/58/234>

dat_train <- training(dat_split)

dat_cv <- vfold_cv(dat_train, strata = trans2)

dat_cv
#> #  10-fold cross-validation using stratification 
#> # A tibble: 10 x 2
#>    splits           id    
#>  * <named list>     <chr> 
#>  1 <split [158/18]> Fold01
#>  2 <split [158/18]> Fold02
#>  3 <split [158/18]> Fold03
#>  4 <split [158/18]> Fold04
#>  5 <split [158/18]> Fold05
#>  6 <split [158/18]> Fold06
#>  7 <split [158/18]> Fold07
#>  8 <split [158/18]> Fold08
#>  9 <split [160/16]> Fold09
#> 10 <split [160/16]> Fold10

dat_recipe <- recipe(trans2 ~ ., data = dat) %>% 
    step_normalize(all_numeric()) %>% 
    step_dummy(all_nominal())

dat_recipe
#> Data Recipe
#> 
#> Inputs:
#> 
#>       role #variables
#>    outcome          1
#>  predictor         10
#> 
#> Operations:
#> 
#> Centering and scaling for all_numeric
#> Dummy variables from all_nominal

rf_model <- rand_forest() %>% 
    set_args(mtry = 4, trees = tune(), min_n = tune()) %>% 
    set_engine("ranger") %>% 
    set_mode("classification")

rf_model
#> Random Forest Model Specification (classification)
#> 
#> Main Arguments:
#>   mtry = 4
#>   trees = tune()
#>   min_n = tune()
#> 
#> Computational engine: ranger

rf_workflow <- workflow() %>% 
    add_recipe(dat_recipe) %>% 
    add_model(rf_model)

rf_workflow
#> == Workflow ===========================================================================================================================
#> Preprocessor: Recipe
#> Model: rand_forest()
#> 
#> -- Preprocessor -----------------------------------------------------------------------------------------------------------------------
#> 2 Recipe Steps
#> 
#> * step_normalize()
#> * step_dummy()
#> 
#> -- Model ------------------------------------------------------------------------------------------------------------------------------
#> Random Forest Model Specification (classification)
#> 
#> Main Arguments:
#>   mtry = 4
#>   trees = tune()
#>   min_n = tune()
#> 
#> Computational engine: ranger

rf_grid <- rf_model %>%
    parameters() %>% 
    grid_max_entropy(size = 10)

rf_grid
#> # A tibble: 10 x 2
#>    trees min_n
#>    <int> <int>
#>  1  1014    12
#>  2   737    37
#>  3   339    22
#>  4  1728     2
#>  5  1951    30
#>  6  1673    18
#>  7     9    40
#>  8   966    26
#>  9   345     5
#> 10  1440    39

rf_tune_cv <- rf_workflow %>% 
    tune_grid(resamples = dat_cv,
              grid = rf_grid,
              metrics = metric_set(accuracy, roc_auc)
    )
#> x Fold01: model  1/10: Error: A `parameters` object has required columns.
#> Missing ...
#> x Fold01: model  2/10: Error: A `parameters` object has required columns.
#> Missing ...
#> x Fold01: model  3/10: Error: A `parameters` object has required columns.
#> Missing ...
#> x Fold01: model  4/10: Error: A `parameters` object has required columns.
#> Missing ...
#> x Fold01: model  5/10: Error: A `parameters` object has required columns.
#> Missing ...
#> x Fold01: model  6/10: Error: A `parameters` object has required columns.
#> Missing ...
#> x Fold01: model  7/10: Error: A `parameters` object has required columns.
#> Missing ...
4

0 回答 0