我首先看一下 tidymodels。我当前项目的替代方案是未整理的游侠。在测试运行中,使用 ranger 引擎的 tidymodels 分类随机森林在经典 iris 数据集上运行时比手持 ranger 慢得多(大约慢十倍)。这是为什么?
library(tidymodels)
library(ranger)
# Make example data
data("iris")
mydata <- iris[sample(1:nrow(iris), 600, replace=T),]
# Recipe
myrecipe <- mydata %>% recipe( Species ~ . )
# Setting a Ranger RF model
myRF <- rand_forest( trees = 300, mtry = 3, min_n = 1) %>%
set_mode("classification") %>%
set_engine("ranger")
# Setting a workflow
myworkflow <- workflow() %>%
add_model(myRF) %>%
add_recipe(myrecipe)
# Compare base ranger and tidy setup
time <- Sys.time()
fit_ranger <- ranger( Species ~ . , data = mydata, probability = T,
mtry = 3, num.trees = 300, min.node.size = 1)
ranger_time <- difftime( Sys.time(), time, "secs")
time <- Sys.time()
fit_tidy <- myworkflow %>%
fit(data= mydata)
tidy_time <- difftime( Sys.time(), time, "secs")
tidy_time
ranger_time