1

我爱MLR!在下面的代码中,我比较了四个分类器的性能。当我使用 PIMA Indian Diabetes 数据运行以下代码时,我遇到了一些奇怪的错误:

library(mlbench)
library(caret)
library(randomForest)
data(PimaIndiansDiabetes)
data2<-data
## Define the task
Class.task = makeClassifTask(id = "USUBJID", data = data2, target = "Class", positive ="B")

Class.task = makeClassifTask( data = PimaIndiansDiabetes, target = "diabetes", positive ="pos")


fv = generateFilterValuesData(Class.task, method = "mrmr")

plotFilterValues(fv)

filtered.task = filterFeatures(Class.task, fval = fv, threshold = -.2)

#filtered.task = Class.task

n = getTaskSize(filtered.task)
train.set = sample(n, size = round(2/3 * n))
test.set = setdiff(seq_len(n), train.set)

lrn1 = makeLearner("classif.lda", predict.type = "prob")
mod1 = train(lrn1, filtered.task, subset = train.set)
pred1 = predict(mod1, task = filtered.task, subset = test.set)


lrn2 = makeLearner("classif.ksvm", predict.type = "prob")
mod2 = train(lrn2, filtered.task, subset = train.set)
pred2 = predict(mod2, task = filtered.task, subset = test.set)

lrn3 = makeLearner("classif.randomForest", predict.type = "prob")
mod3 = train(lrn3, Class.task, subset = train.set)
pred3 = predict(mod3, task = Class.task, subset = test.set)

lrn5 = makeLearner("classif.xgboost", predict.type = "prob")
mod5 = train(lrn5, Class.task, subset = train.set)
pred5 = predict(mod5, task = Class.task, subset = test.set)

### Tune wrapper for ksvm
rdesc.inner = makeResampleDesc("Holdout")
ms = list(auc, mmce)
ps = makeParamSet(
  makeDiscreteParam("C", 2^(-1:1))
)
ctrl = makeTuneControlGrid()
lrn2 = makeTuneWrapper(lrn2, rdesc.inner,ms, ps,  ctrl, show.info = FALSE)

lrns = list(lrn1, lrn2,lrn3,lrn5)
rdesc.outer = makeResampleDesc("CV", iters = 5)

bmr = benchmark(lrns, tasks = filtered.task, resampling = rdesc.outer, measures = ms, show.info = FALSE)
bmr

我得到的错误是:

 Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors
    > pred1 = predict(mod1, task = filtered.task, subset = test.set)
    Error in predict(mod1, task = filtered.task, subset = test.set) : 
    object 'mod1' not found
    > lrn2 = makeLearner("classif.ksvm", predict.type = "prob")
    > mod2 = train(lrn2, filtered.task, subset = train.set)
    Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors
    > pred2 = predict(mod2, task = filtered.task, subset = test.set)
    Error in predict(mod2, task = filtered.task, subset = test.set) : 
    object 'mod2' not found 
    > lrn3 = makeLearner("classif.randomForest", predict.type = "prob")
    > mod3 = train(lrn3, Class.task, subset = train.set)
    Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors
    > pred3 = predict(mod3, task = Class.task, subset = test.set)
    Error in predict(mod3, task = Class.task, subset = test.set) : 
    object 'mod3' not found
    > 
    > lrn5 = makeLearner("classif.xgboost", predict.type = "prob")
    > mod5 = train(lrn5, Class.task, subset = train.set)
    Error in unique.default(x, nmax = nmax) : 
    unique() applies only to vectors
    > pred5 = predict(mod5, task = Class.task, subset = test.set)
    Error in predict(mod5, task = Class.task, subset = test.set) :

我确实得到了性能结果..关于我做错了什么有什么想法吗?谢谢!!!

4

1 回答 1

2

问题是您之前加载mlrcaret- 两者都有train功能并且一个阴影另一个取决于您首先加载的内容。你需要mlr最后加载包(你根本不需要caret)。

编辑:完整的工作代码

library(mlbench)
library(mlr)

data(PimaIndiansDiabetes)

Class.task = makeClassifTask( data = PimaIndiansDiabetes, target = "diabetes", positive ="pos")


fv = generateFilterValuesData(Class.task, method = "mrmr")

plotFilterValues(fv)

filtered.task = filterFeatures(Class.task, fval = fv, threshold = -.2)

#filtered.task = Class.task

n = getTaskSize(filtered.task)
train.set = sample(n, size = round(2/3 * n))
test.set = setdiff(seq_len(n), train.set)

lrn1 = makeLearner("classif.lda", predict.type = "prob")
mod1 = train(lrn1, filtered.task, subset = train.set)
pred1 = predict(mod1, task = filtered.task, subset = test.set)


lrn2 = makeLearner("classif.ksvm", predict.type = "prob")
mod2 = train(lrn2, filtered.task, subset = train.set)
pred2 = predict(mod2, task = filtered.task, subset = test.set)

lrn3 = makeLearner("classif.randomForest", predict.type = "prob")
mod3 = train(lrn3, Class.task, subset = train.set)
pred3 = predict(mod3, task = Class.task, subset = test.set)

lrn5 = makeLearner("classif.xgboost", predict.type = "prob")
mod5 = train(lrn5, Class.task, subset = train.set)
pred5 = predict(mod5, task = Class.task, subset = test.set)

### Tune wrapper for ksvm
rdesc.inner = makeResampleDesc("Holdout")
ms = list(auc, mmce)
ps = makeParamSet(
  makeDiscreteParam("C", 2^(-1:1))
)
ctrl = makeTuneControlGrid()
lrn2 = makeTuneWrapper(lrn2, rdesc.inner,ms, ps,  ctrl, show.info = FALSE)

lrns = list(lrn1, lrn2,lrn3,lrn5)
rdesc.outer = makeResampleDesc("CV", iters = 5)

bmr = benchmark(lrns, tasks = filtered.task, resampling = rdesc.outer, measures = ms, show.info = FALSE)
bmr
于 2016-07-15T17:45:24.840 回答