我有一些行代码如下。
library(mlr3)
library(mlr3pipelines)
library(mlr3extralearners)
library(DALEX)
library(DALEXtra)
library(tidyverse)
data = tsk("german_credit")$data()
data = data[, c("credit_risk", "amount", "purpose", "age")]
task = TaskClassif$new("german_credit", backend = data, target = "credit_risk")
g = po("imputemedian") %>>%
po("imputeoor") %>>%
po("fixfactors") %>>%
po("encodeimpact") %>>%
lrn("classif.lightgbm")
gl = GraphLearner$new(g)
gl$train(task)
分解以评估每个变量的贡献
lgbm_explain <- explain_mlr3(
gl,
data = task$data(),
y = ifelse(task$data()$credit_risk == 'bad', 1, 0),
label = "Lightgbm",
colorize = FALSE
)
# Test for first obs
newdata <- data[1,]
lgbm_predict_part <- predict_parts(lgbm_explain, new_observation = newdata)
plot(lgbm_predict_part)
要使用predict_parts
. 我试图通过使用这个函数来使用循环,但它运行得非常慢。
fnc_predict_parts <- function(data, i){
newdata <- data %>% slice(i)
pred_part <- predict_parts(lgbm_explain, new_observation = newdata)
return(pred_part)
}
list_pred_parts <- nrow(data) %>%
seq_len() %>%
map_dfr(fnc_predict_parts, data = data, .id = 'id')
请问,如何并行运行predict_parts
?或者任何算法都可以运行整体数据?