2

我试图在 R 中拟合高斯分布和均匀分布的混合物,但成功有限。

我依赖于flexmix提供一个 EM 框架,并使用 package.json 中匹配矩估计器的略微修改版本来拟合fitdistrplus组件。(适应只是用加权版本替换样本均值和方差。)

因为我flexmix对代码(如下)不是很熟悉,所以这是一个 hack。

看起来均匀分布的权重太小了。

非常感谢任何有关如何改善合身性的帮助。

首先是例子

生成玩具数据

a <- rnorm(10000)
b <- runif(10000, min=1, max=5)
c <- c(a,b) + 10
testdata <- data.frame(c=c)

样本数据

理想的合身

comp1 <- 0.5*dnorm(testdata$c, mean=10)
comp2 <- 0.5*dunif(testdata$c, min=11, max=15)

理想的合身

合身

fit <- flexmix(c~1, data=testdata, k=2,
               model=FLXMCnormplusunifw(),
               cluster=as.integer(testdata$c < 12)+1)

合身

功能

## adapted from the 'fitdistrplus' package
fitnorm_w <- function(data, weights) {
  library("Hmisc")
  n <- length(data)
  m <- weighted.mean(data, weights)
  v <- wtd.var(data, weights, normwt = TRUE)

  list(mean=m, sd=sqrt(v))
}
## adapted from the 'fitdistrplus' package
fitunif_w <- function(data, weights) {
  library("Hmisc")
  n <- length(data)
  m <- weighted.mean(data, weights)
  v <- wtd.var(data, weights, normwt = TRUE)

  min1 <- m - sqrt(3*v)
  max1 <- m + sqrt(3*v)

  list(min=min1,
       max=max1)
}

## generate needed S4 class for flexmix -- "norm"
FLXMCnorm1w <- function (formula = . ~ .)
{
  z <- new("FLXMC", weighted = TRUE, formula = formula, dist = "norm",
           name = "model-based univariate norm clustering")
  z@defineComponent <- expression({
    logLik <- function(x, y) dnorm(y, mean = mean, sd = sd,
                                    log = TRUE)
    predict <- function(x, ...) matrix(mean, nrow = nrow(x),
                                       ncol = 1, byrow = TRUE)
    new("FLXcomponent",
        parameters = list(mean = as.vector(mean), sd = as.vector(sd)),
        df = df, logLik = logLik, predict = predict)
  })
  z@fit <- function(x, y, w, ...) {
    ##browser()
    para <- fitnorm_w(as.vector(y), as.vector(w))
    para$df <- 2
    with(para, eval(z@defineComponent))
  }
  z
}

## generate needed S4 class for flexmix -- "unif"
FLXMCunif1w <- function (formula = . ~ .)
{
  z <- new("FLXMC", weighted = TRUE, formula = formula, dist = "unif",
           name = "model-based univariate uniform clustering")
  z@defineComponent <- expression({
    logLik <- function(x, y) dunif(y, min = min, max = max,
                                   log = TRUE)
    predict <- function(x, ...) matrix(runif(nrow(x), min=min, max=max),
                                       nrow = nrow(x), ncol = 1, byrow = TRUE)
    new("FLXcomponent",
        parameters = list(min = as.vector(min), max = as.vector(max)),
        df = df, logLik = logLik, predict = predict)
  })
  z@fit <- function(x, y, w, ...) {
    ##browser()
    para <- fitunif_w(as.vector(y), as.vector(w))
    para$df <- 2
    with(para, eval(z@defineComponent))
  }
  z
}


## ---------------------------------- ##
## the norm+uniform flexmix class     ##
## ---------------------------------- ##

setClass("FLXMCnormplusunifw", contains = "FLXMC")
##
FLXMCnormplusunifw <- function(formula = . ~ ., ...)
{
  new("FLXMCnormplusunifw", FLXMCnorm1w(formula, ...),
      name = paste("FLXMCnormplusunifw" , sep=":"))
}
##
setMethod("FLXremoveComponent", signature(model = "FLXMCnormplusunifw"),
          function(model , nok, ...)
          {
            if (1 %in% nok) as(model , "FLXMC") else model
          })
##
setMethod("FLXmstep", signature(model = "FLXMCnormplusunifw"),
          function(model , weights , ...)
          {
            ##browser()
            comp.1 <- FLXMCunif1w()
            c(list(comp.1@fit(model@x, model@y, weights[, -1,drop=FALSE])),
              FLXmstep(as(model , "FLXMC"), weights[, 1, drop=FALSE]))
          })
4

0 回答 0