0

我有一个模型:

model<-xgboost(data=as.matrix(data[,-1]),label=data$Ethnicity, num_class=8, nrounds=50,objective="multi:softmax",lambda=1, eval_metric="merror")

数据是随机调查问题的 94 个变量的矩阵,标签是Ethnicity,这是一个 0-7 变量编码种族/民族,因此从 0 到 7 的每个数字都代表一个种族。

我发现哪些变量在预测中最重要:

xgb.importance(model=model)

##     Feature         Gain        Cover   Frequency
##  1:     q97 0.0924173556 0.0388402250 0.016981237
##  2:      q9 0.0603595554 0.0199381316 0.012749847
##  3:      q7 0.0456855077 0.0447756304 0.066922777
##  4:      q6 0.0436987577 0.0485072162 0.041311731
##  5:      q8 0.0319606309 0.0212999077 0.015199599
##  6:     q99 0.0276115402 0.0201090242 0.007961695
##  7:     q89 0.0245865711 0.0249913356 0.023829408
##  8:     q13 0.0197648132 0.0190748590 0.010912533
##  9:     q81 0.0194462208 0.0140010066 0.021880742
## 10:     q71 0.0192126872 0.0194684164 0.019709370

现在我被卡住了,我的问题是如何描述或显示这些变量与标签之间的关系?蒂亚!

以下是来自 dput(head(data)) 的一些数据:

structure(list(r = c(2, 6, 4, 4, 4, 4), q6 = c(1.73, 1.5, 1.9, 
NA, 1.63, 1.7), q7 = c(54.43, 51.26, 66.68, NA, 68.49, 59.88), 
    q8 = c(2, 2, 1, 2, 1, 2), q9 = c(5, 5, 5, 5, 4, 5), q10 = c(5, 
    1, 1, 1, 3, 1), q11 = c(1, 1, 1, 2, 1, 1), q12 = c(1, 1, 
    1, 4, 1, 1), q13 = c(1, 1, 1, 4, 1, 1), q14 = c(1, 1, 1, 
    1, 1, 1), q15 = c(1, 1, 1, 1, 1, 1), q16 = c(1, 1, 3, 1, 
    1, 1), q17 = c(2, 1, NA, 1, 1, 1), q18 = c(3, 1, NA, 2, 1, 
    1), q19 = c(2, 1, NA, 1, 1, 1), q20 = c(2, 1, NA, 2, 1, 1
    ), q21 = c(2, 2, NA, 2, 1, 2), q22 = c(2, 1, 1, 1, 4, 2), 
    q23 = c(2, 1, NA, 1, 5, 2), q24 = c(1, 2, 1, 2, 1, 1), q25 = c(1, 
    2, 1, 2, 2, 1), q26 = c(2, 2, 1, 1, 1, 1), q27 = c(2, 2, 
    1, 2, 1, 1), q28 = c(2, 2, 2, 2, 1, 1), q29 = c(1, 1, NA, 
    1, 1, 3), q30 = c(1, 1, NA, 1, 1, 3), q31 = c(1, 2, NA, 1, 
    1, 1), q32 = c(6, 1, NA, 6, 6, 1), q33 = c(NA, 1, NA, 2, 
    5, 1), q34 = c(NA, 1, NA, 2, 4, 1), q35 = c(NA, 1, NA, 5, 
    5, 1), q36 = c(2, 1, NA, 3, 3, 1), q37 = c(1, 1, NA, 1, 1, 
    1), q38 = c(6, 1, NA, 4, 1, 1), q39 = c(1, 2, 2, 1, 1, 2), 
    q40 = c(3, 1, NA, 2, 7, 1), q41 = c(6, 1, 2, 5, 6, 3), q42 = c(5, 
    1, 5, 5, 5, 6), q43 = c(1, 1, 1, 2, 2, 2), q44 = c(1, 1, 
    1, 2, 2, NA), q45 = c(1, 1, 1, 5, 7, 4), q46 = c(1, 1, 1, 
    6, 5, 7), q47 = c(7, 1, NA, 7, 7, 6), q48 = c(6, 1, 7, 5, 
    5, 6), q49 = c(4, 1, NA, 6, 1, 4), q50 = c(1, 1, 1, 2, 3, 
    1), q51 = c(1, 1, 1, 1, 1, 1), q52 = c(1, 1, 1, 1, 1, 1), 
    q53 = c(1, 1, 1, 2, 3, 1), q54 = c(1, 1, 1, 1, 2, 1), q55 = c(1, 
    1, 1, 2, 1, 1), q56 = c(1, 1, 1, 1, 1, 1), q57 = c(1, 1, 
    1, 4, 4, 2), q58 = c(1, 1, 1, 1, 1, 1), q59 = c(1, 2, 2, 
    2, 1, 1), q60 = c(1, 2, 1, 1, 1, 1), q61 = c(7, 1, 2, 5, 
    6, 6), q62 = c(3, 1, 3, 5, 7, 5), q63 = c(3, 1, 3, 2, 4, 
    5), q64 = c(3, 1, 3, 3, 3, 2), q65 = c(2, 1, 2, 2, 2, 3), 
    q66 = c(4, 1, NA, 4, 4, 2), q67 = c(2, 3, 3, 2, 3, 2), q68 = c(1, 
    1, 2, 1, 1, 1), q69 = c(2, 3, 3, 2, 3, 3), q70 = c(2, 4, 
    4, 2, 1, 1), q71 = c(3, 2, 3, 1, 3, 2), q72 = c(4, 4, 4, 
    2, 3, 2), q73 = c(1, 2, 1, 1, 1, 2), q74 = c(2, 2, 3, 2, 
    2, 2), q75 = c(2, 2, 2, 2, 2, 1), q76 = c(7, 2, 2, 2, 2, 
    1), q77 = c(3, 3, 4, 4, 2, 7), q78 = c(1, 2, 4, 2, 1, 3), 
    q79 = c(4, 8, 6, 3, 1, 2), q80 = c(6, 4, 4, 3, 1, 4), q81 = c(5, 
    NA, 1, 4, 2, 1), q82 = c(7, 1, 6, 5, 2, 7), q83 = c(1, 1, 
    1, 6, 1, 6), q84 = c(1, 1, 1, 2, 1, 2), q85 = c(2, 2, 1, 
    2, 2, 2), q86 = c(1, 1, NA, 1, 1, 1), q87 = c(2, 2, NA, 2, 
    2, 1), q88 = c(4, 5, 5, 3, 1, 2), q89 = c(4, 2, 2, 4, 2, 
    4), q90 = c(2, 1, NA, NA, 1, 2), q91 = c(1, 1, 1, 3, 3, 1
    ), q92 = c(1, 1, 1, 2, 2, 5), q93 = c(4, 5, 7, 4, 7, 2), 
    q94 = c(3, 3, 2, 2, 3, 2), q95 = c(1, 4, 1, 1, 1, 4), q96 = c(1, 
    1, 1, 1, 1, 1), q97 = c(1, 1, 3, 1, 2, 3), q98 = c(1, 2, 
    2, 1, 1, 1), q99 = c(1, 1, 1, 1, 1, 2)), row.names = c(NA, 
6L), class = "data.frame")
4

0 回答 0