我运行下面的代码。如果我停用实例化(如图所示),我的基准比较结果对于三个基准实验会有所不同,并且哪个学习器表现更好的结论可能会有所不同。
我该如何解决这个问题?一种方法可能是对大量重采样进行平均。我可以为此编写代码,但也许这在调用“基准”时已经是一个选项?
resampling = rsmp("cv", folds=20)
#resampling$instantiate(task) # results below will (and shall) differ, if instantiation is not performed here
design = benchmark_grid(
tasks = task,
learners = list(glrn_knn_pca, glrn_knn_nopca),
resamplings = resampling
)
design2 = benchmark_grid(
tasks = task,
learners = list(glrn_knn_pca, glrn_knn_nopca),
resamplings = resampling
)
design3 = benchmark_grid(
tasks = task,
learners = list(glrn_knn_pca, glrn_knn_nopca),
resamplings = resampling
)
bmr = benchmark(design)
bmr2 = benchmark(design2)
bmr3 = benchmark(design3)
bmr$aggregate(msr("classif.auc"))
bmr2$aggregate(msr("classif.auc"))
bmr3$aggregate(msr("classif.auc"))