r - 如何在R中计算模糊性能指标和归一化分类熵

Question

我正在使用e1071包运行模糊 C 均值聚类。我想根据以下公式中给出的模糊性能指数（FPI）（模糊程度）和归一化分类熵（NCE）（特定类的无组织程度）来确定最佳聚类数

其中 c 是聚类数，n 是观测数，μ _ik是模糊隶属度，log _a是自然对数。

我正在使用以下代码

library(e1071)
x <- rbind(matrix(rnorm(100,sd=0.3),ncol=2),
         matrix(rnorm(100,mean=1,sd=0.3),ncol=2))
cl <- cmeans(x,2,20,verbose=TRUE,method="cmeans")
cl$membership

我已经能够提取 μ _ik即模糊隶属度。现在，cmeans必须针对不同数量的集群，例如 2 到 6，并且必须计算 FPI 和 NCE 以获得如下图

如何在 R 中实现？

编辑

iris我已经使用以下代码尝试了@nya 为数据集提供的代码

df <- scale(iris[-5])

FPI <- function(cmem){
  c <- ncol(cmem)
  n <- nrow(cmem)
  
  1 - (c / (c - 1)) * (1 - sum(cmem^2) / n)
}

NCE <- function(cmem){
  c <- ncol(cmem)
  n <- nrow(cmem)
  
  (n / (n - c)) * (- sum(cmem * log(cmem)) / n)
}

# prepare variables
cl <- list()
fpi <- nce <- NULL

# cycle through the desired number of clusters
for(i in 2:6){
  cl[[i]] <- cmeans(df, i, 20, method = "cmeans")
  fpi <- c(fpi, FPI(cl[[i]]$membership))
  nce <- c(nce, NCE(cl[[i]]$membership))
}

# add space for the second axis label
par(mar = c(5,4,1,4) + .1)

# plot FPI
plot(2:6, fpi, lty = 2, pch = 18, type = "b", xlab = "Number of clusters", ylab = "FPI")

# plot NCE, manually adding the second axis
par(new = TRUE)
plot(2:6, nce, lty = 1, pch = 15, type = "b", xlab = "", ylab = "", axes = FALSE)
axis(4, at = pretty(range(nce)))
mtext("NCE", side = 4, line = 3)

# add legend
legend("top", legend = c("FPI", "NCE"), pch = c(18,15), lty = c(2,1), horiz = TRUE)

考虑模糊性能指数（FPI）和归一化分类熵（NCE）的最小值来确定最佳聚类数。NCE 一直在增加，而 FPI 则呈下降趋势。理想情况下应该是

score 5 · Accepted Answer

使用可用的方程，我们可以编写自己的函数。在这里，这两个函数使用了您建议的论文中的方程式和作者引用的参考文献之一。

FPI <- function(cmem, method = c("FuzME", "McBrathney", "Rahul")){
    method = match.arg(method)
    C <- ncol(cmem)
    N <- nrow(cmem)

    # Rahul et al. 2019. https://doi.org/10.1080/03650340.2019.1578345
    if(method == "Rahul"){
        res <- 1 - (C / (C - 1)) * (1 - sum(cmem^2) / N)
    }
    # McBrathney & Moore 1985 https://doi.org/10.1016/0168-1923(85)90082-6
    if(method == "McBrathney"){
        F <- sum(cmem^2) / N
        res <- 1 - (C * F - 1) / (F - 1)
    }
    # FuzME https://precision-agriculture.sydney.edu.au/resources/software/
    # MATLAB code file fvalidity.m, downloaded on 11 Nov, 2021 
    if(method == "FuzME"){
        F <- sum(cmem^2) / N
        res <- 1 - (C * F - 1) / (C - 1)
    }
    return(res)
}

NCE <- function(cmem, method = c("FuzME", "McBrathney", "Rahul")){
    method = match.arg(method)
    C <- ncol(cmem)
    N <- nrow(cmem)

    if(method == "Rahul"){
        res <- (N / (N - C)) * (- sum(cmem * log(cmem)) / N)
    }
    if(method %in% c("FuzME", "McBrathney")){
        H <- -1 / N * sum(cmem * log(cmem)) 
        res <- H / log(C)
    }
    return(res)
}

cmeans然后使用这些从数据集中的函数的隶属度计算索引iris。

# prepare variables
cl <- list()
fpi <- nce <- NULL

# cycle through the desired number of clusters
for(i in 2:6){
    cl[[i]] <- e1071::cmeans(iris[, -5], i, 20, method = "cmeans")
    fpi <- c(fpi, FPI(cl[[i]]$membership, method = "M"))
    nce <- c(nce, NCE(cl[[i]]$membership, method = "M"))
}

最后，在一个绘图中绘制两个不同的轴。

# add space for the second axis label
par(mar = c(5,4,1,4) + .1)

# plot FPI
plot(2:6, fpi, lty = 2, pch = 18, type = "b", xlab = "Number of clusters", ylab = "FPI")

# plot NCE, manually adding the second axis
par(new = TRUE)
plot(2:6, nce, lty = 1, pch = 15, type = "b", xlab = "", ylab = "", axes = FALSE)
axis(4, at = pretty(range(nce)))
mtext("NCE", side = 4, line = 3)

# add legend
legend("top", legend = c("FPI", "NCE"), pch = c(18,15), lty = c(2,1), horiz = TRUE)

EDIT1：根据来自两个不同出版物的可选方程更新函数并计算iris数据集上的示例。

EDIT2：添加了此处可用的 FuzME MATLAB 代码中指定的 FPI 和 NCE 计算的代码。

score 0 · Accepted Answer

希望这可以帮助

library(dplyr)
library(ggplot2)

f <- function(cl) {
  C <- length(cl$size)
  N <- sum(cl$size)
  mu <- cl$membership
  fpi <- 1 - C / (C - 1) * (1 - sum((mu)^2) / N)
  nce <- N / (N - C) * (-sum(log(mu) * mu) / N)
  c(FPI = fpi, NCE = nce)
}

data.frame(t(rbind(
  K = 2:6,
  sapply(
    K,
    function(k) f(cmeans(x, k, 20, verbose = TRUE, method = "cmeans"))
  )
))) %>%
  pivot_longer(cols = FPI:NCE, names_to = "Index") %>%
  ggplot(aes(x = K, y = value, group = Index)) +
  geom_line(aes(linetype = Index, color = Index)) +
  geom_point() +
  scale_y_continuous(
    name = "FPI",
    sec.axis = sec_axis(~., name = "NCE")
  ) +
  theme(legend.position = "top")

r - 如何在R中计算模糊性能指标和归一化分类熵

2 回答 2

Related

Reference