我已经针对分类问题运行了一个带有嵌套交叉验证(调整+性能测量)的基准实验,并且想创建校准图表。
如果我将基准结果对象传递给 generateCalibrationData,那么 plotCalibration 会做什么?是平均吗?如果有怎么办?
根据 ROC 曲线的 generateThreshVsPerfData ,使用 aggregate = FALSE 选项来理解折叠之间的可变性是否有意义?
为了响应@Zach 对可重现示例的请求,我(OP)编辑我的原始帖子如下:
编辑:可重现的示例
# Practice Data
library("mlr")
library("ROCR")
library(mlbench)
data(BreastCancer)
dim(BreastCancer)
levels(BreastCancer$Class)
head(BreastCancer)
BreastCancer <- BreastCancer[, -c(1, 6, 7)]
BreastCancer$Cl.thickness <- as.factor(unclass(BreastCancer$Cl.thickness))
BreastCancer$Cell.size <- as.factor(unclass(BreastCancer$Cell.size))
BreastCancer$Cell.shape <- as.factor(unclass(BreastCancer$Cell.shape))
BreastCancer$Marg.adhesion <- as.factor(unclass(BreastCancer$Marg.adhesion))
head(BreastCancer)
# Define Nested Cross-Validation Strategy
cv.inner <- makeResampleDesc("CV", iters = 2, stratify = TRUE)
cv.outer <- makeResampleDesc("CV", iters = 6, stratify = TRUE)
# Define Performance Measures
perf.measures <- list(auc, mmce)
# Create Task
bc.task <- makeClassifTask(id = "bc",
data = BreastCancer,
target = "Class",
positive = "malignant")
# Create Tuned KSVM Learner
ksvm <- makeLearner("classif.ksvm",
predict.type = "prob")
ksvm.ps <- makeParamSet(makeDiscreteParam("C", values = 2^(-2:2)),
makeDiscreteParam("sigma", values = 2^(-2:2)))
ksvm.ctrl <- makeTuneControlGrid()
ksvm.lrn = makeTuneWrapper(ksvm,
resampling = cv.inner,
measures = perf.measures,
par.set = ksvm.ps,
control = ksvm.ctrl,
show.info = FALSE)
# Create Tuned Random Forest Learner
rf <- makeLearner("classif.randomForest",
predict.type = "prob",
fix.factors.prediction = TRUE)
rf.ps <- makeParamSet(makeDiscreteParam("mtry", values = c(2, 3, 5)))
rf.ctrl <- makeTuneControlGrid()
rf.lrn = makeTuneWrapper(rf,
resampling = cv.inner,
measures = perf.measures,
par.set = rf.ps,
control = rf.ctrl,
show.info = FALSE)
# Run Cross-Validation Experiments
bc.lrns = list(ksvm.lrn, rf.lrn)
bc.bmr <- benchmark(learners = bc.lrns,
tasks = bc.task,
resampling = cv.outer,
measures = perf.measures,
show.info = FALSE)
# Calibration Charts
bc.cal <- generateCalibrationData(bc.bmr)
plotCalibration(bc.cal)
产生以下内容:
尝试取消聚合会导致:
> bc.cal <- generateCalibrationData(bc.bmr, aggregate = FALSE)
Error in generateCalibrationData(bc.bmr, aggregate = FALSE) :
unused argument (aggregate = FALSE)
>
> sessionInfo()
R version 3.2.3 (2015-12-10)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] mlbench_2.1-1 ROCR_1.0-7 gplots_3.0.1 mlr_2.9
[5] stringi_1.1.1 ParamHelpers_1.10 ggplot2_2.1.0 BBmisc_1.10
loaded via a namespace (and not attached):
[1] digest_0.6.9 htmltools_0.3.5 R6_2.2.0 splines_3.2.3
[5] scales_0.4.0 assertthat_0.1 grid_3.2.3 stringr_1.0.0
[9] bitops_1.0-6 checkmate_1.8.2 gdata_2.17.0 survival_2.38-3
[13] munsell_0.4.3 tibble_1.2 randomForest_4.6-12 httpuv_1.3.3
[17] parallelMap_1.3 mime_0.5 DBI_0.5-1 labeling_0.3
[21] chron_2.3-47 shiny_1.0.0 KernSmooth_2.23-15 plyr_1.8.4
[25] data.table_1.9.6 magrittr_1.5 reshape2_1.4.1 kernlab_0.9-25
[29] ggvis_0.4.3 caTools_1.17.1 gtable_0.2.0 colorspace_1.2-6
[33] tools_3.2.3 parallel_3.2.3 dplyr_0.5.0 xtable_1.8-2
[37] gtools_3.5.0 backports_1.0.4 Rcpp_0.12.4