这是我的数据中的一个示例:
data
## q6 q7 q8 q9 q10 q11 q12 q13 q14 q15 q16 q17 q18 q19 q20 q21 q22 q23 w
## 1 1.73 54.43 2 5 5 1 1 1 1 1 1 2 3 2 2 2 2 2 0
## 2 1.50 51.26 2 5 1 1 1 1 1 1 1 1 1 1 1 2 1 1 0
## 3 1.90 66.68 1 5 1 1 1 1 1 1 3 NA NA NA NA NA 1 NA 0
## 4 NA NA 2 5 1 2 4 4 1 1 1 1 2 1 2 2 1 1 0
## 5 1.63 68.49 1 4 3 1 1 1 1 1 1 1 1 1 1 1 4 5 1
## 6 1.70 59.88 2 5 1 1 1 1 1 1 1 1 1 1 1 2 2 2 0
## 7 1.73 70.76 2 5 1 2 8 1 1 1 1 1 1 1 1 2 2 2 1
## 8 1.75 90.72 NA 5 1 NA NA 1 1 1 1 2 1 1 1 2 2 2 0
## 9 1.50 40.82 2 4 2 1 1 3 1 1 1 1 1 1 1 2 3 2 0
## 10 1.68 49.90 1 5 1 1 1 1 1 1 1 1 1 1 1 2 2 2 0
## 11 1.50 86.18 1 3 2 NA 6 4 1 1 2 8 5 1 1 1 NA NA 1
## 12 1.88 79.83 3 5 1 2 2 1 1 1 1 2 1 1 1 2 1 1 1
## 13 1.78 68.49 2 4 1 1 1 1 1 1 1 1 1 1 1 2 2 2 1
## 14 1.73 54.43 2 4 1 1 1 1 1 1 1 1 3 1 1 2 2 2 0
## 15 1.80 72.58 2 4 1 1 1 1 1 1 1 1 3 1 1 2 2 2 0
然后我交叉验证了这些数据:
xgb.cv(data=as.matrix(data),label=data$w, num_class=2, nrounds=20, nfold=5, eval_metric="merror", lambda=1, objective = "multi:softmax")
我的标签是“w”列。num_class 为 2,因为“w”有 0 或 1 类。我的最终目标是构建一个分类器,以使用 xgboost 从数据中预测标签 w 但是当我运行上面的 xgb.cv 时,它返回给我:
## [1] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [2] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [3] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [4] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [5] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [6] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [7] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [8] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [9] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [10] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [11] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [12] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [13] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [14] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [15] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [16] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [17] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [18] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [19] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
## [20] train-merror:0.000000+0.000000 test-merror:0.000000+0.000000
所有的训练和测试错误都是 0。为什么?我该如何解决这个问题?
这是要求的dput(head(data))
:
## structure(list(q6 = c(1.73, 1.5, 1.9, NA, 1.63, 1.7), q7 = c(54.43,
## 51.26, 66.68, NA, 68.49, 59.88), q8 = c(2, 2, 1, 2, 1, 2), q9 = c(5,
## 5, 5, 5, 4, 5), q10 = c(5, 1, 1, 1, 3, 1), q11 = c(1, 1, 1, 2,
## 1, 1), q12 = c(1, 1, 1, 4, 1, 1), q13 = c(1, 1, 1, 4, 1, 1),
## q14 = c(1, 1, 1, 1, 1, 1), q15 = c(1, 1, 1, 1, 1, 1), q16 = c(1,
## 1, 3, 1, 1, 1), q17 = c(2, 1, NA, 1, 1, 1), q18 = c(3, 1,
## NA, 2, 1, 1), q19 = c(2, 1, NA, 1, 1, 1), q20 = c(2, 1, NA,
## 2, 1, 1), q21 = c(2, 2, NA, 2, 1, 2), q22 = c(2, 1, 1, 1,
## 4, 2), q23 = c(2, 1, NA, 1, 5, 2), q24 = c(1, 2, 1, 2, 1,
## 1), q25 = c(1, 2, 1, 2, 2, 1), q26 = c(2, 2, 1, 1, 1, 1),
## q27 = c(2, 2, 1, 2, 1, 1), q28 = c(2, 2, 2, 2, 1, 1), q29 = c(1,
## 1, NA, 1, 1, 3), q30 = c(1, 1, NA, 1, 1, 3), q31 = c(1, 2,
## NA, 1, 1, 1), q32 = c(6, 1, NA, 6, 6, 1), q33 = c(NA, 1,
## NA, 2, 5, 1), q34 = c(NA, 1, NA, 2, 4, 1), q35 = c(NA, 1,
## NA, 5, 5, 1), q36 = c(2, 1, NA, 3, 3, 1), q37 = c(1, 1, NA,
## 1, 1, 1), q38 = c(6, 1, NA, 4, 1, 1), q39 = c(1, 2, 2, 1,
## 1, 2), q40 = c(3, 1, NA, 2, 7, 1), q41 = c(6, 1, 2, 5, 6,
## 3), q42 = c(5, 1, 5, 5, 5, 6), q43 = c(1, 1, 1, 2, 2, 2),
## q44 = c(1, 1, 1, 2, 2, NA), q45 = c(1, 1, 1, 5, 7, 4), q46 = c(1,
## 1, 1, 6, 5, 7), q47 = c(7, 1, NA, 7, 7, 6), q48 = c(6, 1,
## 7, 5, 5, 6), q49 = c(4, 1, NA, 6, 1, 4), q50 = c(1, 1, 1,
## 2, 3, 1), q51 = c(1, 1, 1, 1, 1, 1), q52 = c(1, 1, 1, 1,
## 1, 1), q53 = c(1, 1, 1, 2, 3, 1), q54 = c(1, 1, 1, 1, 2,
## 1), q55 = c(1, 1, 1, 2, 1, 1), q56 = c(1, 1, 1, 1, 1, 1),
## q57 = c(1, 1, 1, 4, 4, 2), q58 = c(1, 1, 1, 1, 1, 1), q59 = c(1,
## 2, 2, 2, 1, 1), q60 = c(1, 2, 1, 1, 1, 1), q61 = c(7, 1,
## 2, 5, 6, 6), q62 = c(3, 1, 3, 5, 7, 5), q63 = c(3, 1, 3,
## 2, 4, 5), q64 = c(3, 1, 3, 3, 3, 2), q65 = c(2, 1, 2, 2,
## 2, 3), q66 = c(4, 1, NA, 4, 4, 2), q67 = c(2, 3, 3, 2, 3,
## 2), q68 = c(1, 1, 2, 1, 1, 1), q69 = c(2, 3, 3, 2, 3, 3),
## q70 = c(2, 4, 4, 2, 1, 1), q71 = c(3, 2, 3, 1, 3, 2), q72 = c(4,
## 4, 4, 2, 3, 2), q73 = c(1, 2, 1, 1, 1, 2), q74 = c(2, 2,
## 3, 2, 2, 2), q75 = c(2, 2, 2, 2, 2, 1), q76 = c(7, 2, 2,
## 2, 2, 1), q77 = c(3, 3, 4, 4, 2, 7), q78 = c(1, 2, 4, 2,
## 1, 3), q79 = c(4, 8, 6, 3, 1, 2), q80 = c(6, 4, 4, 3, 1,
## 4), q81 = c(5, NA, 1, 4, 2, 1), q82 = c(7, 1, 6, 5, 2, 7),
## q83 = c(1, 1, 1, 6, 1, 6), q84 = c(1, 1, 1, 2, 1, 2), q85 = c(2,
## 2, 1, 2, 2, 2), q86 = c(1, 1, NA, 1, 1, 1), q87 = c(2, 2,
## NA, 2, 2, 1), q88 = c(4, 5, 5, 3, 1, 2), q89 = c(4, 2, 2,
## 4, 2, 4), q90 = c(2, 1, NA, NA, 1, 2), q91 = c(1, 1, 1, 3,
## 3, 1), q92 = c(1, 1, 1, 2, 2, 5), q93 = c(4, 5, 7, 4, 7,
## 2), q94 = c(3, 3, 2, 2, 3, 2), q95 = c(1, 4, 1, 1, 1, 4),
## q96 = c(1, 1, 1, 1, 1, 1), q97 = c(1, 1, 3, 1, 2, 3), q98 = c(1,
## 2, 2, 1, 1, 1), q99 = c(1, 1, 1, 1, 1, 2), w = c(0, 0, 0,
## 0, 0, 0)), row.names = c(NA, 6L), class = "data.frame")