0

我试过运行这个 tidymodels 工作流程,看看我是否可以组合两个具有不同特征和缺失值的模型。能够结合不同的数据源来对相同的结果进行建模对于现实世界的数据来说非常方便,只是不确定它是否可能与 tidymodels 堆栈一起使用。这个工作流程是否有明显的问题导致堆栈失败?

library(tidyverse)
library(tidymodels)
library(workflows)
library(probably)
library(tune)
library(stacks)

set.seed(1234)

mtcars_tb <- mtcars %>% 
             as_tibble() %>% 
             mutate(vs = c(sample(vs, 0), rep(NA_integer_, 10), sample(vs, 22)),
             disp = c(sample(disp, 22), rep(NA_integer_, 10)))

train_test_split <- initial_split(mtcars_tb)

train <- training(train_test_split)

test <- testing(train_test_split)

cv_fold_mtc <- vfold_cv(mtcars_tb)

recipe_naomit <- recipe(mpg ~ disp + vs, data = train) %>% 
  step_naomit(all_predictors(), skip = TRUE) %>%
  step_normalize(all_numeric()) 

ctrl_grid <- control_stack_grid()
ctrl_res <- control_stack_resamples()


wflow <- workflow() %>% 
  add_recipe(recipe_naomit)


lasso_mod <- 
  linear_reg() %>%
  set_engine("glmnet") %>% 
  set_args(penalty = tune(),
           mixture = 1)

wflow <-
  wflow %>%
  add_model(lasso_mod)


lasso_tune <- 
  tune_grid(
    object = wflow, 
    resamples = cv_fold_mtc, 
    grid = 10,
    control = ctrl_grid
  )

recipe_rm_vs <- recipe(mpg ~ disp, data = train) %>% 
  step_naomit(all_predictors(), skip = TRUE) %>%
  step_normalize(all_numeric()) 

wflow <- wflow %>% 
  update_recipe(recipe_rm_vs)


linear_mod <- 
  linear_reg() %>%
  set_engine("lm") 

wflow <-
  wflow %>%
  update_model(linear_mod)


linear_tune_disp <- fit_resamples(
  wflow,
  resamples = cv_fold_mtc,
  control = ctrl_res
)

 model_st <- stacks() %>%
  stack_add(lasso_tune) %>%
  stack_add(linear_tune_disp) %>% 
  stack_blend() %>% 
  stack_fit()
#> Warning: Values are not uniquely identified; output will contain list-cols.
#> * Use `values_fn = list` to suppress this warning.
#> * Use `values_fn = length` to identify where the duplicates arise
#> * Use `values_fn = {summary_fun}` to summarise duplicates
#> x Fold01: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold02: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold03: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold04: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold05: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold06: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold07: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold08: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold09: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> x Fold10: formula: Error in model.frame.default(formula, data = data): invalid typ...
#> Warning: All models failed in tune_grid(). See the `.notes` column.
#> Error: All of the models failed. See the .notes column.

 model_st
#> Error in eval(expr, envir, enclos): object 'model_st' not found
4

1 回答 1

0

第一个关于堆栈的帖子(我认为!)!看到兴趣超级兴奋。

您的代码看起来很棒——这是一个堆栈(或朋友)错误。我已经在 stacks repo 上打开了一个问题,并将很快在这里进行修复。解决此问题后,将在此处留言。:-)

于 2020-08-02T20:39:05.537 回答