我有一个模型:
model<-xgboost(data=as.matrix(data[,-1]),label=data$Ethnicity, num_class=8, nrounds=50,objective="multi:softmax",lambda=1, eval_metric="merror")
数据是随机调查问题的 94 个变量的矩阵,标签是Ethnicity,这是一个 0-7 变量编码种族/民族,因此从 0 到 7 的每个数字都代表一个种族。
我发现哪些变量在预测中最重要:
xgb.importance(model=model)
## Feature Gain Cover Frequency
## 1: q97 0.0924173556 0.0388402250 0.016981237
## 2: q9 0.0603595554 0.0199381316 0.012749847
## 3: q7 0.0456855077 0.0447756304 0.066922777
## 4: q6 0.0436987577 0.0485072162 0.041311731
## 5: q8 0.0319606309 0.0212999077 0.015199599
## 6: q99 0.0276115402 0.0201090242 0.007961695
## 7: q89 0.0245865711 0.0249913356 0.023829408
## 8: q13 0.0197648132 0.0190748590 0.010912533
## 9: q81 0.0194462208 0.0140010066 0.021880742
## 10: q71 0.0192126872 0.0194684164 0.019709370
现在我被卡住了,我的问题是如何描述或显示这些变量与标签之间的关系?蒂亚!
以下是来自 dput(head(data)) 的一些数据:
structure(list(r = c(2, 6, 4, 4, 4, 4), q6 = c(1.73, 1.5, 1.9,
NA, 1.63, 1.7), q7 = c(54.43, 51.26, 66.68, NA, 68.49, 59.88),
q8 = c(2, 2, 1, 2, 1, 2), q9 = c(5, 5, 5, 5, 4, 5), q10 = c(5,
1, 1, 1, 3, 1), q11 = c(1, 1, 1, 2, 1, 1), q12 = c(1, 1,
1, 4, 1, 1), q13 = c(1, 1, 1, 4, 1, 1), q14 = c(1, 1, 1,
1, 1, 1), q15 = c(1, 1, 1, 1, 1, 1), q16 = c(1, 1, 3, 1,
1, 1), q17 = c(2, 1, NA, 1, 1, 1), q18 = c(3, 1, NA, 2, 1,
1), q19 = c(2, 1, NA, 1, 1, 1), q20 = c(2, 1, NA, 2, 1, 1
), q21 = c(2, 2, NA, 2, 1, 2), q22 = c(2, 1, 1, 1, 4, 2),
q23 = c(2, 1, NA, 1, 5, 2), q24 = c(1, 2, 1, 2, 1, 1), q25 = c(1,
2, 1, 2, 2, 1), q26 = c(2, 2, 1, 1, 1, 1), q27 = c(2, 2,
1, 2, 1, 1), q28 = c(2, 2, 2, 2, 1, 1), q29 = c(1, 1, NA,
1, 1, 3), q30 = c(1, 1, NA, 1, 1, 3), q31 = c(1, 2, NA, 1,
1, 1), q32 = c(6, 1, NA, 6, 6, 1), q33 = c(NA, 1, NA, 2,
5, 1), q34 = c(NA, 1, NA, 2, 4, 1), q35 = c(NA, 1, NA, 5,
5, 1), q36 = c(2, 1, NA, 3, 3, 1), q37 = c(1, 1, NA, 1, 1,
1), q38 = c(6, 1, NA, 4, 1, 1), q39 = c(1, 2, 2, 1, 1, 2),
q40 = c(3, 1, NA, 2, 7, 1), q41 = c(6, 1, 2, 5, 6, 3), q42 = c(5,
1, 5, 5, 5, 6), q43 = c(1, 1, 1, 2, 2, 2), q44 = c(1, 1,
1, 2, 2, NA), q45 = c(1, 1, 1, 5, 7, 4), q46 = c(1, 1, 1,
6, 5, 7), q47 = c(7, 1, NA, 7, 7, 6), q48 = c(6, 1, 7, 5,
5, 6), q49 = c(4, 1, NA, 6, 1, 4), q50 = c(1, 1, 1, 2, 3,
1), q51 = c(1, 1, 1, 1, 1, 1), q52 = c(1, 1, 1, 1, 1, 1),
q53 = c(1, 1, 1, 2, 3, 1), q54 = c(1, 1, 1, 1, 2, 1), q55 = c(1,
1, 1, 2, 1, 1), q56 = c(1, 1, 1, 1, 1, 1), q57 = c(1, 1,
1, 4, 4, 2), q58 = c(1, 1, 1, 1, 1, 1), q59 = c(1, 2, 2,
2, 1, 1), q60 = c(1, 2, 1, 1, 1, 1), q61 = c(7, 1, 2, 5,
6, 6), q62 = c(3, 1, 3, 5, 7, 5), q63 = c(3, 1, 3, 2, 4,
5), q64 = c(3, 1, 3, 3, 3, 2), q65 = c(2, 1, 2, 2, 2, 3),
q66 = c(4, 1, NA, 4, 4, 2), q67 = c(2, 3, 3, 2, 3, 2), q68 = c(1,
1, 2, 1, 1, 1), q69 = c(2, 3, 3, 2, 3, 3), q70 = c(2, 4,
4, 2, 1, 1), q71 = c(3, 2, 3, 1, 3, 2), q72 = c(4, 4, 4,
2, 3, 2), q73 = c(1, 2, 1, 1, 1, 2), q74 = c(2, 2, 3, 2,
2, 2), q75 = c(2, 2, 2, 2, 2, 1), q76 = c(7, 2, 2, 2, 2,
1), q77 = c(3, 3, 4, 4, 2, 7), q78 = c(1, 2, 4, 2, 1, 3),
q79 = c(4, 8, 6, 3, 1, 2), q80 = c(6, 4, 4, 3, 1, 4), q81 = c(5,
NA, 1, 4, 2, 1), q82 = c(7, 1, 6, 5, 2, 7), q83 = c(1, 1,
1, 6, 1, 6), q84 = c(1, 1, 1, 2, 1, 2), q85 = c(2, 2, 1,
2, 2, 2), q86 = c(1, 1, NA, 1, 1, 1), q87 = c(2, 2, NA, 2,
2, 1), q88 = c(4, 5, 5, 3, 1, 2), q89 = c(4, 2, 2, 4, 2,
4), q90 = c(2, 1, NA, NA, 1, 2), q91 = c(1, 1, 1, 3, 3, 1
), q92 = c(1, 1, 1, 2, 2, 5), q93 = c(4, 5, 7, 4, 7, 2),
q94 = c(3, 3, 2, 2, 3, 2), q95 = c(1, 4, 1, 1, 1, 4), q96 = c(1,
1, 1, 1, 1, 1), q97 = c(1, 1, 3, 1, 2, 3), q98 = c(1, 2,
2, 1, 1, 1), q99 = c(1, 1, 1, 1, 1, 2)), row.names = c(NA,
6L), class = "data.frame")