1

我正在尝试在 r 中使用“fscaret”函数进行特征选择,但它抛出了一个我找不到任何信息的错误。错误是

Error in if (mean(x[i, -i]) > mean(x[-j, j])) { : 
  missing value where TRUE/FALSE needed

Dput(head(data,2)) 如下所示:

structure(list(lvvalue = c(65, 55), lvtest_ECHO = c(1, 0), lvtest_MUGA = c(0, 
1), lvtest_Not_Done = c(0, 0), bilirubin = c(0.4, 11), alat = c(26, 
34), asat = c(7, 18), alkaline_phosphatase = c(61, 58), creatinine = c(0.8, 
52), age = c(43L, 39L), sex = c(1, 1), lumpectomy = c(1L, 0L), 
    mastectomy = 0:1, other_surg = c(0L, 0L), quad_seg = c(1L, 
    0L), resaln = c(21L, 25L), posaln = c(3L, 18L), histype_idc = c(1, 
    1), histype_ilc = c(0, 0), histype_other = c(0, 0), ncgr_cba = c(0, 
    0), ncgr_md = c(0, 0), ncgr_pd = c(1, 1), ncgr_un = c(0, 
    0), ncgr_wd = c(0, 0), ptsize = c(3, 4), tnm_val = c(54L, 
    54L), ptsite_Left = c(0, 0), ptsite_Right = c(1, 1), evidis_no = c(NA_real_, 
    NA_real_), evidis_yes = c(NA_real_, NA_real_), fishresNegative = c(1, 
    1), fishresPositive = c(0, 0), wbc = c(8.6, 7.2), platelets = c(340, 
    261), hemoglobin = c(13.7, 12.6), neutophils = c(4.988, 4.76
    ), penabnAbnormal = c(0, 0), penabnNormal = c(1, 1), penabnNot_Done = c(0, 
    0), q1 = c(NA, 2), q2 = c(NA, 1), q3 = c(NA, 1), q4 = c(NA, 
    1), q5 = c(NA, 1), q6 = c(NA, 3), q7 = c(NA, 3), q8 = c(NA, 
    3), q9 = c(NA, 2), q10 = c(NA, 2), q11 = c(NA, 3), q12 = c(NA, 
    2), q13 = c(NA, 2), q14 = c(NA, 2), q15 = c(NA, 1), q16 = c(NA, 
    1), q17 = c(NA, 2), q18 = c(NA, 2), q19 = c(NA, 2), q20 = c(NA, 
    2), q21 = c(NA, 3), q22 = c(NA, 3), q23 = c(NA, 2), q24 = c(NA, 
    2), q25 = c(NA, 2), q26 = c(NA, 2), q27 = c(NA, 2), q28 = c(NA, 
    2), q29 = c(NA, 5), q30 = c(NA, 5), q31 = c(NA, 3), q32 = c(NA, 
    2), q33 = c(NA, 1), q34 = c(NA, 1), q35 = c(NA, 5), q36 = c(NA, 
    2), q37 = c(NA, 1), q38 = c(NA, 2), q39 = c(NA, 2), q40 = c(NA, 
    2), q41 = c(NA, 2), q42 = c(NA, 2), q43 = c(NA, 3), q44 = c(NA, 
    1), q45 = c(NA, 2), q46 = c(NA, 3), q47 = c(NA, 3), q48 = c(NA, 
    1), q49 = c(NA, 3), q50 = c(NA, 1), q51 = c(NA, 1), q52 = c(NA, 
    1), q53 = c(NA, 1), abdominal_ct_scan = 0:1, abdominal_mri = c(0L, 
    0L), abdominal_ultrasound = c(1L, 1L), bone_scan = c(1L, 
    1L), bone_x_ray = c(0L, 0L), chest_ct_scan = c(0L, 0L), chest_mri = c(0L, 
    0L), chest_x_ray = c(1L, 1L), left_breast_mammography = c(1L, 
    1L), not_applicable_due_to_prior_procedure = c(0L, 0L), right_breast_mammography = c(0L, 
    0L), ultrasound_left_breast = c(1L, 1L), ultrasound_right_breast = c(0L, 
    0L), ht = c(NA, 147), wt = c(NA, 7.42), eintna_Non_significant_abnormalities = c(0, 
    0), eintna_Not_Done = c(0, 0), eintna_Significant_abnormalities = c(0, 
    0), eintna_Within_Normal_Limits = c(1, 1), class = structure(c(2L, 
    1L), .Label = c("no", "yes"), class = "factor")), .Names = c("lvvalue", 
"lvtest_ECHO", "lvtest_MUGA", "lvtest_Not_Done", "bilirubin", 
"alat", "asat", "alkaline_phosphatase", "creatinine", "age", 
"sex", "lumpectomy", "mastectomy", "other_surg", "quad_seg", 
"resaln", "posaln", "histype_idc", "histype_ilc", "histype_other", 
"ncgr_cba", "ncgr_md", "ncgr_pd", "ncgr_un", "ncgr_wd", "ptsize", 
"tnm_val", "ptsite_Left", "ptsite_Right", "evidis_no", "evidis_yes", 
"fishresNegative", "fishresPositive", "wbc", "platelets", "hemoglobin", 
"neutophils", "penabnAbnormal", "penabnNormal", "penabnNot_Done", 
"q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", 
"q11", "q12", "q13", "q14", "q15", "q16", "q17", "q18", "q19", 
"q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27", "q28", 
"q29", "q30", "q31", "q32", "q33", "q34", "q35", "q36", "q37", 
"q38", "q39", "q40", "q41", "q42", "q43", "q44", "q45", "q46", 
"q47", "q48", "q49", "q50", "q51", "q52", "q53", "abdominal_ct_scan", 
"abdominal_mri", "abdominal_ultrasound", "bone_scan", "bone_x_ray", 
"chest_ct_scan", "chest_mri", "chest_x_ray", "left_breast_mammography", 
"not_applicable_due_to_prior_procedure", "right_breast_mammography", 
"ultrasound_left_breast", "ultrasound_right_breast", "ht", "wt", 
"eintna_Non_significant_abnormalities", "eintna_Not_Done", "eintna_Significant_abnormalities", 
"eintna_Within_Normal_Limits", "class"), row.names = 1:2, class = "data.frame")

我试过的脚本是:

splitindex <- createDataPartition(test$class,p=0.75,list=F,times=1)
traindf <- test[splitindex,]
testdf <-test[-splitindex,]
myfs <- fscaret(traindf,testdf, myTimeLimit=40, preprocessData=TRUE,
                Used.funcRegPred="gbm",with.labels=TRUE,
                supress.output=FALSE,no.cores=2,saveModel=T)

数据确实包含很多 NA,使用“gbm”的想法是它可以处理缺失值。

“traindf”包含随机选择的数据的 75%。“testdf”包含其余 25%。

任何帮助都感激不尽。

问候,

4

0 回答 0