3

我正在尝试为bartMachinein 中的一个很好的用法示例建模Caret,但我似乎无法正确地为 a 建模bartMachineCaret谁能告诉我,主要错误到底是什么意思?或者是否有用于 BART 建模的简单可重现代码?

这是我使用 HouseVotes84 和汽车数据集的一些虚拟数据对 bartMachine 建模的片段:

library(mlbench)
library(caret)

data("HouseVotes84")
#Using HouseVotes84 as Classification Task Dataset and mtcars as Regression Task Dataset
dummy_data_classif <- HouseVotes84[,2:length(colnames(HouseVotes84))] %>% 
  mutate_if(is.factor, as.numeric)
dummy_data_classif <- data.frame(cbind(Class=HouseVotes84[,1], dummy_data_classif))
dummy_data_classif[is.na(dummy_data_classif)] <- 0

data("cars")
dummy_data_regr <- cars

caret_method_tester <- function(dummy_data, formula, resample_plan=1, 
                                test_method, time_limit=30, 
                                grid_param=c(), parallel_mode=FALSE){
  library(caret)
  library(R.utils)
  formula <- as.formula(formula)
  resampling <- NULL
  if(resample_plan==1){
    resampling <- trainControl(method = "repeatedcv",
                               number = 10,
                               repeats = 5,
                               allowParallel = parallel_mode) 
  }
  else if(resample_plan==2){
    resampling <- trainControl(method = "cv",
                               number = 5,
                               allowParallel = parallel_mode) 
  }
  else if(resample_plan==3){
    resampling <- trainControl(method = "adaptive_cv",
                               number = 10, repeats = 5,
                               allowParallel = parallel_mode,
                               adaptive = list(min = 3, alpha = 0.05, 
                                               method = "BT", complete = FALSE))
  }
  else if(resample_plan==4){
    resampling <- trainControl(method = "boot",
                               number = 5,
                               allowParallel = parallel_mode)
  }
  else if(resample_plan==5){
    resampling <- trainControl(method = "boot_all",
                               number = 5,
                               allowParallel = parallel_mode)
  }
  tryCatch(
    expr={
      if(length(grid_param) > 0){
        withTimeout(
          model <- caret::train(formula, 
                       data = dummy_data, 
                       method = test_method, 
                       trControl = resampling,
                       tuneGrid=grid_param), timeout = 300
        )
      }
      else{
        withTimeout(
          model <- caret::train(formula, 
                                data = dummy_data, 
                                method = test_method, 
                                trControl = resampling), timeout=300   
        )
        
      }
      return(model)
    },
    error=function(cond){
      message("Test Model Failed")
      message("Here's the original error message:")
      message(cond)
      return(NULL)
    },
    warning=function(cond){
      message("Warning Triggered!")
      message("Here's the original warning message:")
      message(cond)
      return(model)
    }
  )
}

bart_reg <- caret_method_tester(dummy_data_regr, "Price ~ .", 
                test_method="bartMachine", time_limit=30, resample_plan=2)

Test Model Failed
Here's the original error message:
argument is of length zero

bart_classif <- caret_method_tester(dummy_data_classif, "Class ~ .", 
                test_method="bartMachine", time_limit=30, resample_plan=2)

Test Model Failed
Here's the original error message:
incorrect number of dimensions

我使用 try Catch 方法轻松地通知有关代码进度的事情,因此代码失败、发出警告或成功时很清楚。

就我而言,数据集也没有任何 NA 值

4

1 回答 1

5

如果您将代码减少到基本部分会更好,基本上火车功能bartMachine不起作用。我们可以用这个例子来说明这一点,我们得到同样的错误信息:

mdl = train(mpg ~ .,data=mtcars,method="bartMachine",trControl=trainControl(method="cv"))
Error in if (grepl("adaptive", trControl$method) & nrow(tuneGrid) == 1) { : 
  argument is of length zero

该错误是代码中的错误caret,如果您不提供调整网格,则用于创建它的默认函数不会返回 data.frame:

getModelInfo()$bartMachine$grid
function(x, y, len = NULL, search = "grid") {
                    if(search == "grid") {
                      out <- expand.grid(num_trees = 50,
                                         k = (1:len)+ 1,
                                         alpha = seq(.9, .99, length = len),
                                         beta = seq(1, 3, length = len),
                                         nu =  (1:len)+ 1)
                    } else {
                      out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
                                        k = runif(len, min = 0, max = 5),
                                        alpha = runif(len, min = .9, max = 1),
                                        beta = runif(len, min = 0, max = 4),
                                        nu = runif(len, min = 0, max = 5))
                    }
                    if(is.factor(y)) {
                      out$k <- NA
                      out$nu <- NA
                    }
                  }

您可以提供一个调谐网格:

mdl = train(mpg ~ .,data=mtcars,method="bartMachine",
trControl=trainControl(method="boot"),
tuneGrid=data.frame(num_trees=50,k=3,alpha=0.1,beta=0.1,nu=4))

mdl

Bayesian Additive Regression Trees 

32 samples
10 predictors

No pre-processing
Resampling: Bootstrapped (25 reps) 
Summary of sample sizes: 32, 32, 32, 32, 32, 32, ... 
Resampling results:

  RMSE      Rsquared   MAE     
  2.826126  0.8344417  2.292464

Tuning parameter 'num_trees' was held constant at a value of 50
 'beta' was held constant at a value of 0.1
Tuning parameter 'nu' was
 held constant at a value of 4

或者你修复上面的函数并创建一个新方法,你可以在这里阅读更多:

newBartMachine = getModelInfo()$bartMachine

newBartMachine$grid = function(x, y, len = NULL, search = "grid") {
                    if(search == "grid") {
                      out <- expand.grid(num_trees = 50,
                                         k = (1:len)+ 1,
                                         alpha = seq(.9, .99, length = len),
                                         beta = seq(1, 3, length = len),
                                         nu =  (1:len)+ 1)
                    } else {
                      out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
                                        k = runif(len, min = 0, max = 5),
                                        alpha = runif(len, min = .9, max = 1),
                                        beta = runif(len, min = 0, max = 4),
                                        nu = runif(len, min = 0, max = 5))
                    }
                    if(is.factor(y)) {
                      out$k <- NA
                      out$nu <- NA
                    }
                    return(out)
                  }
mdl = train(mpg ~ .,data=mtcars,method=newBartMachine,trControl=trainControl(method="cv"),tuneLength=1)

Bayesian Additive Regression Trees 

32 samples
10 predictors

No pre-processing
Resampling: Cross-Validated (10 fold) 
Summary of sample sizes: 28, 28, 28, 29, 30, 30, ... 
Resampling results:

  RMSE      Rsquared   MAE     
  2.338429  0.9581958  2.057181

Tuning parameter 'num_trees' was held constant at a value of 50
 'beta' was held constant at a value of 1
Tuning parameter 'nu' was
 held constant at a value of 2
于 2020-07-19T10:57:29.083 回答