在这个分析的最后阶段有一个错误。在 h2o 模型上运行 explain() 函数时,我收到以下错误:错误:所有排列与原始观察没有相似之处。尝试将 bin_continuous 设置为 TRUE 和/或增加 kernel_size 我已经尝试了错误中的两个建议。如果我将 bin_continous 更改为 TRUE,lime() 函数不起作用,其他内核大小也不起作用。
关于如何解决这个问题并因此能够使用 plot_features() 函数获得结果的任何想法?
library(readxl)
library(httr)
library(dplyr)
library(h2o)
library(lime)
GET("https://community.watsonanalytics.com/wp-content/uploads/2015/03/WA_FnUseC_-HR-Employee-Attrition.xlsx",
write_disk(tf <- tempfile(fileext = ".xls")))
hr_data_raw <- read_xlsx(tf)
hr_data <- hr_data_raw %>%
mutate_if(is.character, as.factor) %>%
select(Attrition, everything())
h2o.init()
h2o.no_progress()
hr_data_h2o <- as.h2o(hr_data)
split_h2o <- h2o.splitFrame(hr_data_h2o, c(0.7, 0.15), seed = 1234 )
train_h2o <- h2o.assign(split_h2o[[1]], "train" ) # 70%
valid_h2o <- h2o.assign(split_h2o[[2]], "valid" ) # 15%
test_h2o <- h2o.assign(split_h2o[[3]], "test" ) # 15%
y <- "Attrition"
x <- setdiff(names(train_h2o), y)
automl_models_h2o <- h2o.automl(
x = x,
y = y,
training_frame = train_h2o,
validation_frame = valid_h2o,
leaderboard_frame = test_h2o,
max_runtime_secs = 30)
automl_leader <- automl_models_h2o@leader
explainer <- lime::lime(
as.data.frame(train_h2o[,-1]),
model = automl_leader,
bin_continuous = F)
explanation <- lime::explain(
as.data.frame(test_h2o[1:10, -1]),
explainer = explainer,
n_labels = 1,
n_features = 4)
# Error: All permutations have no similarity to the original observation.
# Try setting bin_continuous to TRUE and/or increase kernel_size
# Cannot Continue
plot_features(explanation)