0

我有这些数据,我需要找到该表的最佳聚类数。这些值可以是 0、0.5 或 1

library(NbClust)
library(factoextra)
library(pheatmap)

    tab=structure(list(`57-B1` = c(1, 0.5, 0.5, 1, 1, 0.5), `57-B3` = c(0.5,
0.5, 0.5, 0, 0.5, 0.5), `57-C1` = c(1, 0.5, 0.5, 0.5, 1, 0.5),
    `57-C5` = c(1, 0.5, 0.5, 1, 1, 1), `57-H2` = c(1, 0.5, 0.5,
    0, 1, 1), `57-H4` = c(0.5, 0.5, 0.5, 0, 0.5, 0.5), `61-1-B1` = c(0.5,
    0.5, 0.5, 0, 0.5, 0.5), `61-1-C2` = c(0.5, 0.5, 0.5, 0, 0.5,
    0.5), `61-1-C5` = c(0.5, 0.5, 0.5, 0, 0.5, 0.5), `61-1-H1` = c(0.5,
    0.5, 0, 0, 0.5, 0.5), `61-1-H3` = c(0.5, 0.5, 0.5, 0, 0.5,
    0.5), `61-1-H5` = c(0.5, 0.5, 0, 0.5, 0.5, 0.5), `62-2_H2` = c(0.5,
    0.5, 0.5, 0, 0.5, 0.5), `62_1_C2` = c(0.5, 0.5, 0, 0.5, 0.5,
    0.5), `62_1_C5` = c(0.5, 0.5, 0.5, 0, 0.5, 0.5), `FL-39-C3` = c(0.5,
    0.5, 0.5, 0, 0.5, 0.5), `FL-41-1-C3` = c(0.5, 0.5, 0.5, 0,
    0.5, 0.5), `FL-57-B1` = c(0.5, 0.5, 0.5, 0, 0.5, 0.5), `FL-57-B2` = c(0.5,
    0.5, 0.5, 0, 0.5, 0.5), `FL-57-C2` = c(0.5, 0.5, 0, 0.5,
    0.5, 0.5), `FL-57-C3` = c(1, 1, 1, 0, 1, 1), `FL-57-C5` = c(1,
    0.5, 0.5, 1, 1, 1), `FL-57-H1` = c(1, 0.5, 0.5, 1, 1, 1),
    `FL-57-H4` = c(0.5, 0.5, 0, 0, 0.5, 0.5), `FL-57-H5` = c(0.5,
    0.5, 0.5, 0, 0.5, 0.5), `FL-61-1-B1` = c(0.5, 0.5, 0.5, 0,
    0.5, 0.5), `FL-61-1-B4` = c(0.5, 0.5, 0.5, 0, 0.5, 0.5),
    `FL-61-1-C2` = c(0.5, 0.5, 0, 0, 0.5, 0.5), `FL-61-1-C4` = c(0.5,
    0.5, 0.5, 0, 0.5, 0.5), `FL-61-1-H3` = c(0.5, 0.5, 0.5, 0,
    0.5, 0.5), `FL-61-1-H4` = c(0.5, 0.5, 0.5, 0, 0.5, 0.5),
    `FL-61-1-H5` = c(0.5, 0.5, 0, 0.5, 0.5, 0.5), `FL-62-1-C3` = c(0.5,
    0.5, 0, 0.5, 0.5, 0.5), `FL-62-2-H2` = c(0.5, 0.5, 0.5, 0,
    0.5, 0.5), `FL-73-H1` = c(0.5, 0.5, 0.5, 0, 0.5, 0.5), P_57_F = c(0.5,
    0.5, 0.5, 0, 0.5, 0.5), P_57_M = c(0.5, 0.5, 0.5, 0, 0.5,
    0.5)), row.names = c("g1", "g2", "g3", "g4", "g5", "g6"), class = "data.frame")

我尝试了缩放值和非缩放值:

fviz_nbclust(scale(tab), kmeans, method = "wss") 
fviz_nbclust(tab, kmeans, method = "wss") 

我得到这个错误:

Error in FUNcluster(x, i, ...) :
  more cluster centers than distinct data points.

我该如何解决?

非常感谢您的帮助 !

4

1 回答 1

0

我可能找到了解决方案:指定 k.max = 任何低于 nrow(tab) 的数字就足够了

于 2021-05-23T22:22:17.767 回答