嗨,我正在使用机器学习模型中的特征工程配方。但是,当我使用 step_dummy 时,虚拟变量被视为数值变量,而不是因子。我认为当我们使用随机森林或其他树模型时,这可能会出现问题。我们怎样才能改变这一点?PDP 显示虚拟预测器被视为数字。所以 X 轴有 0.25, 0.5....... 这应该只有 0 和 1(因为 dummy)。
library(modeldata)
library(recipes)
library(caret)
library(ranger)
library(ggplot2)
library(pdp)
data(okc)
okc <- okc[complete.cases(okc),]
rec <- recipe(~ diet + age + height, data = okc)
dummies <- rec %>% step_dummy(diet)
dummies <- prep(dummies, training = okc)
dummy_data <- bake(dummies, new_data = okc)
summary(dummy_data)
dummy_data<-na.omit(dummy_data )
dummy_data<-dummy_data[1:2000,]
dummy_data$diet_strictly.anything<-factor(dummy_data$diet_strictly.anything)%>% factor(labels = c("No", "Yes"))
myTrainingControl <- trainControl(method = "cv",
number = 5,
savePredictions = TRUE,
classProbs = TRUE,
summaryFunction = twoClassSummary,
verboseIter = F)
fit_rf <- caret::train(diet_strictly.anything ~ .,
data =dummy_data,
method = "ranger",
tuneLength = 2,
importance = "permutation",
trControl = myTrainingControl)
# Define a prediction function wrapper which requires two arguments
predict.function <- function(object, newdata) {
predict(object, newdata, type="prob")[,2] %>% as.vector()
}
plt_ICE <- pdp::partial(fit_rf,
pred.var = "diet_mostly.vegetarian",
pred.fun = predict.function,
train = dummy_data) %>% autoplot(alpha = 0.1)
plt_ICE