2

我正在尝试在https://archive.ics.uci.edu/ml/datasets/ILPD+(Indian+Liver+Patient+Dataset)上的 UCI 存储库中提供的印度患者肝病分类数据集上实施禁忌搜索,但面临问题。以下是我使用的代码

NF <- 10
NTR <- 193
NTE <- 193
library(class) 
library(e1071)
library(caret)
library(party)
library(nnet)
ILPD <- read.csv("C:/Users/Dell/Desktop/Codes and Datasets/ILPD.csv")
nrow(ILPD)  
set.seed(9850)
gp<-runif(nrow(ILPD))
ILPD<-ILPD[order(gp),]
idx <- createDataPartition(y = ILPD$Class, p = 0.7, list = FALSE)
train<-ILPD[idx,]
test<-ILPD[-idx,]
ver<-test[,11]
evaluate <- function(th){ 
if (sum(th) == 0)return(0)             
model <- svm(train[ ,th==1], train[,11] , gamma = 0.1, kernel ="sigmoid", na.action = na.omit)
pred <- predict(model, test[ ,th==1])
csRate <- sum(pred == ver)/NTE 
penalty <- (NF - sum(th))/NF 
return(csRate + penalty)
}  
library(tabuSearch)
res <- tabuSearch(size = NF, iters = 2, objFunc = evaluate, config =     matrix(1,1,NF), listSize = 5, nRestarts = 4) 
plot(res)
plot(res, "tracePlot")
summary(res, verbose = TRUE)

错误:

Error in if (any(co)) { : missing value where TRUE/FALSE needed
In addition: Warning message:
In FUN(newX[, i], ...) : NAs introduced by coercion
Called from: svm.default(train[, th == 1], train[, 11], gamma = 0.1, kernel = "sigmoid", na.action = na.omit)

部分数据

structure(list(age = c(55L, 48L, 14L, 17L, 40L, 37L), gender = c(0L, 
0L, 0L, 0L, 1L, 0L), TB = c(0.9, 2.4, 0.9, 0.9, 0.9, 0.7), DB = c(0.2, 
1.1, 0.3, 0.2, 0.3, 0.2), Alkphos = c(116L, 554L, 310L, 224L, 
293L, 235L), SGPT = c(36L, 141L, 21L, 36L, 232L, 96L), sgot = c(16L, 
73L, 16L, 45L, 245L, 54L), TP = c(6.2, 7.5, 8.1, 6.9, 6.8, 9.5
), ALB = c(3.2, 3.6, 4.2, 4.2, 3.1, 4.9), AG = c(1, 0.9, 1, 1.55, 
0.8, 1), Class = structure(c(2L, 1L, 2L, 1L, 1L, 1L), .Label = c("One", 
"Two"), class = "factor")), .Names = c("age", "gender", "TB", 
"DB", "Alkphos", "SGPT", "sgot", "TP", "ALB", "AG", "Class"), row.names =    c(216L, 
405L, 316L, 103L, 20L, 268L), class = "data.frame")

如果有人可以帮助我

4

1 回答 1

2

I wanted to see how tabu worked anyway so seemed a good place to start.

Basically you need to test your code better, evaluate just did not work. It is easy to test by hand by creating values of th and then calling evaluate on them.

Also use high level comments to organize your code and keep track of what you are doing, especially when posting to SO for help so as to save us time figuring out what you intend.

Not sure if these results are good, the amount of data is so minimal it is hard to tell.

Anyway here is the changed code:

NF <- 10
NTR <- 193
NTE <- 193
library(class)
library(e1071)
library(caret)
library(party)
library(nnet)

ILPD1 <- structure(
 list(
    age = c(55L,48L,14L,17L,40L,37L),
    gender = c(0L,0L,0L,0L,1L,0L),
    TB = c(0.9,2.4,0.9,0.9,0.9,0.7),
    DB = c(0.2,1.1,0.3,0.2,0.3,0.2),
    Alkphos = c(116L,554L,310L,224L,293L,235L),
    SGPT = c(36L,141L,21L,36L,232L,96L),
    sgot = c(16L,73L,16L,45L,245L,54L),
    TP = c(6.2,7.5,8.1,6.9,6.8,9.5),
    ALB = c(3.2,3.6,4.2,4.2,3.1,4.9),
    AG = c(1,0.9,1,1.55,0.8,1),
    Class = structure(c(2L,1L,2L,1L,1L,1L),
    .Label = c("One","Two"),
    class = "factor")
   ),
   .Names = c("age","gender","TB","DB","Alkphos",
               "SGPT","sgot","TP","ALB","AG","Class"),
   row.names = c(216L,405L,316L,103L,20L,268L),
   class = "data.frame"
)

ILPD <- ILPD1
#ILPD <- read.csv("ILPD.csv")
nrow(ILPD)

set.seed(9850)

# setup test and training data
gp <- runif(nrow(ILPD))
ILPD <- ILPD[order(gp),]
idx <- createDataPartition(y = ILPD$Class,p = 0.7,list = FALSE)
train <- ILPD[idx,]
test <- ILPD[ - idx,]
ver <- test[,11]

evaluate <- function(th) {
    # evaluate the tabu for a value of th
    # tabuSearch will use this function to evaluate points in its search space
    #

    # if everything is turned off just return zero as we are not interested
    if(sum(th) == 0)  return(0)

    # we just want to train our svm on the columns for which th==1
    svmtrn <- train[,th==1]

    # but we need to have the Class varible as our label
    if (is.null(trn$Class)) return(0)

    # Train up an svm now
    #  Note that the first argument is the forumula we are training
    model <- svm(Class~.,svmtrn,gamma = 0.1,kernel = "sigmoid",na.action = na.omit)

    pred <- predict(model,test)

    # now evaluate how well our prediction worked
    csRate <- sum(pred == ver) / NTE
    penalty <- (NF - sum(th)) / NF
    return(csRate + penalty)
}

library(tabuSearch)
evaluate(matrix(1,1,NF))
res <- tabuSearch(size = NF,iters = 2,objFunc = evaluate,
                  config = matrix(1,1,NF),listSize = 5,nRestarts = 4)
plot(res)
plot(res,"tracePlot")
summary(res,verbose = TRUE)

Here are the output results:

[1] 6
[1] 0.005181347
Tabu Settings
  Type                                       = binary configuration
  No of algorithm repeats                    = 1
  No of iterations at each prelim search     = 2
  Total no of iterations                     = 12
  No of unique best configurations           = 8
  Tabu list size                             = 5
  Configuration length                       = 10
  No of neighbours visited at each iteration = 10
Results:
  Highest value of objective fn    = 0.70518
  Occurs # of times                = 1
  Optimum number of variables      = 3
Optimum configuration: 
 [1] 1 0 0 0 0 1 0 0 0 1

And here is your plot:

enter image description here

于 2016-05-25T12:21:56.747 回答