由于 caret-ml 中有一些方法没有内置的特征选择,所以我正在试验rfe
。
有人可以解释一下,除了训练时间和排名特征列表之外,train()
和之间的区别是什么?rfe()
事情是否只能train()
做,反之亦然?
这是我的例子来说明这个问题:
# worker functions
useRFE <- function(trainctrl) {
control <- rfeControl(functions=caretFuncs,
method="cv",
repeats=5,
number=5,
returnResamp="final",
verbose = TRUE)
# run the RFE algorithm
resultsRFE <-
rfe(
training[,1:ncol(iris) - 1],
training$Species,
sizes = subsets,
rfeControl = control,
method = "svmRadial",
metric = "Accuracy",
trControl = trainctrl
)
# test
predictedClasses <- predict(resultsRFE, testing)
confmat <- confusionMatrix(data = predictedClasses$pred, reference = testing$Species)
confmat
# Balanced Accuracy = 100%
}
useTrain <- function(trainctrl) {
# train
resultsTrain <- train(
Species ~ .,
data = training,
method = "svmRadial",
metric = "Accuracy",
trControl = trainctrl
)
# test
predictedClasses <- predict(resultsTrain, testing)
confmat <- confusionMatrix(data = predictedClasses,
reference = testing$Species)
confmat
# Balanced Accuracy = 100%
}
# load library
library(caret)
# load iris data
data(iris)
# create datasets
inTrain <- createDataPartition(y = iris$Species,
p = .75,
list = FALSE)
## The output is a set of integers for the rows of Sonar
## that belong in the training set.
training <<- iris[ inTrain,]
testing <<- iris[-inTrain,]
# select count of features
subsets <- c(1:ncol(iris)-1)
# set controls
trainctrl <- trainControl(
classProbs= TRUE,
method="cv",
repeats=5,
number=5,
verbose = TRUE)
# compare
ptm <- proc.time()
useTrain(trainctrl = trainctrl)
proc.time() - ptm
# Accuracy : 0.9167
# User System passed
# 1.54 0.06 10.27
ptm <- proc.time()
useRFE(trainctrl = trainctrl)
proc.time() - ptm
# Accuracy : 0.9167
# User System passed
# 0.86 0.04 24.44