1

我的理解是,对于 Adaboost M1,损失函数 mean(-y*F) 总是严格递减的,但以下代码并非如此。任何人都可以帮忙吗?

我遵循 Hastie ESL-II 第 10.1 章的示例。底部的损失向量应该严格减少,但事实并非如此。

非常感谢您的帮助。

## Data using example given in T. Hastie, ESL, chapter 10.1
dta <- matrix(rnorm(20000), 2000, 10)
pred <- apply(dta, 1, function(x) sum(x^2))
y <- (pred > qchisq(0.5, 10)) * 2 - 1

## fit y with a two nodes classificaiton tree in x
## randomly sample 19 spliting points and choose the best one 
stump <- function(y, x, w) {

    ## randomly sample 19 splitting points
    ss <- quantile(x, probs=seq(0.05, 0.95, by=0.05) + runif(19) * 0.025) 

    ## compute the loss for each splitting point
    losses <- numeric(0)
    preds <- list()
    cnt <- 1
    for (s in ss) {
        inx <- x < s

        G1 <- rep(-1, length(y))
        G1[inx] <- 1
        losses[cnt] <- sum(as.integer(y != G1) * w)
        preds[[cnt]] <- G1

        G2 <- -G1
        losses[cnt+1] <- sum(as.integer(y != G2) * w)
        preds[[cnt+1]] <- G2

        cnt <- cnt+2
    }
    i <- which(losses == min(losses))[1]

    preds[[i]]
}

## Start the Adaboost M1 algorithm
w <- rep(1/2000, 2000)
F <- rep(0, 2000)
losses <- rep(NA, 400)
mrates <- rep(NA, 400)
m <- 0

while (m <= 400) {
    m <- m + 1

    G <- stump(y, dta[, sample(1:10, 1)], w)
    err <- sum(w[G != y]) / sum(w)

    w <- w * exp(alpha * (y != G))
    w <- w / sum(w)

    F <- F + alpha * G

    losses[m] <- mean(exp(-y*F))
    mrates[m] <- mean(y*F < 0)
    ##cat(sum(exp(-y*F)), "\n")
}

table((F > 0)*2 - 1, y)

par(mfrow=c(2,1))
plot(losses, type="l")
plot(mrates, type="l")
4

0 回答 0