0

我有一些行代码如下。

library(mlr3)
library(mlr3pipelines)
library(mlr3extralearners)
library(DALEX)
library(DALEXtra)
library(tidyverse)

data = tsk("german_credit")$data()
data = data[, c("credit_risk", "amount", "purpose", "age")]
task = TaskClassif$new("german_credit", backend = data, target = "credit_risk")

g = po("imputemedian") %>>%
  po("imputeoor") %>>%
  po("fixfactors") %>>%
  po("encodeimpact") %>>% 
  lrn("classif.lightgbm")

gl = GraphLearner$new(g)

gl$train(task)

分解以评估每个变量的贡献

lgbm_explain <- explain_mlr3(
  gl,
  data     = task$data(),
  y        = ifelse(task$data()$credit_risk == 'bad', 1, 0),
  label    = "Lightgbm",
  colorize = FALSE
)

# Test for first obs
newdata <- data[1,]
lgbm_predict_part <- predict_parts(lgbm_explain, new_observation = newdata)
plot(lgbm_predict_part)

要使用predict_parts. 我试图通过使用这个函数来使用循环,但它运行得非常慢。

fnc_predict_parts <- function(data, i){
  newdata <- data %>% slice(i)
  pred_part <- predict_parts(lgbm_explain, new_observation = newdata)  
  return(pred_part)
} 


list_pred_parts <- nrow(data) %>% 
  seq_len()  %>% 
  map_dfr(fnc_predict_parts, data = data, .id = 'id')

请问,如何并行运行predict_parts?或者任何算法都可以运行整体数据?

4

0 回答 0