0

我正在使用 clusplot 绘制使用 kmeans 的 PC,但棘手的部分是,我希望椭圆由不同的变量(不是用于数据点的 f$cluster)进行颜色编码。这是我给出的命令,但这里有些东西不起作用!

    clusplot(f[,1:3],f$cluster,cex=0.8,color=TRUE,shade=TRUE, 
     labels=5,
     col.clus=unique(f$groupnum),
     lines=0,main="testing")

我的 f 是一个有 33 列的数据框。PC1 到 PC30,然后是 cluster、groupnum 和 groupttl。$cluster 代码用于聚类,但我正在尝试使用 $groupnum 进行颜色编码,并可能使用 $groupttl 来标记椭圆。

一件重要的事情是:$cluster 和 $groupnum 都可以有不同的#groups(即)我只有 5 个集群,但有 6 个唯一的 groupnum 值要为椭圆着色。因此,通常情况下,要着色的椭圆会多于实际的簇数。

谢谢保罗和舒亚。这是我的数据框


structure(list(PC1 = c(8.2545, 1.0159, -1.5319, -1.7703, 1.6903, 
-1.7378, -2.1898, -2.8501, 1.0669, -0.8577, 6.3985, 2.6495, 2.6374, 
0.2395, 8.6415, -0.0638, -2.739, -0.7897, 0.3137, -3.0958, 2.9457, 
-4.1256, -2.6289, -2.9377, -0.6272, -2.7296, 1.2871, -1.6917, 
-0.1118, 11.1845, 2.6486, -3.4377, -0.5581, -3.217, -2.5425, 
-1.419, -0.9338, -0.8993, -3.119, -3.5188, 1.6804, -0.4142, -2.3187, 
-0.1962, -1.3428, 5.1539, 10.3632, -0.9815, -2.7796, 0.0708), 
    PC2 = c(-1.6021, -5.4785, 3.3933, 3.6795, -3.0405, 5.3614, 
    -2.895, 2.2553, -2.648, -0.451, -1.3402, -2.6696, -4.3236, 
    0.0659, -0.4115, 4.0168, 0.034, -0.2273, -0.5867, 0.8339, 
    -0.2328, 1.5119, 5.183, -0.7078, 0.5813, 2.8371, 0.8223, 
    -1.2817, 0.0378, -3.297, -1.0233, 0.5048, -1.9093, -5.5851, 
    -0.8716, -2.135, -4.2768, 1.567, -0.1263, 2.4107, -1.3151, 
    1.6173, -0.3908, 3.7365, -1.3812, -2.1328, -0.05, 0.849, 
    1.9369, -4.7095), PC3 = c(1.4114, -2.0719, -1.927, -0.417, 
    -2.2733, -1.8481, -2.807, 0.8132, -2.6583, -0.5894, 1.4173, 
    2.9953, 0.2831, -1.0971, 2.7126, -3.8635, -0.5739, 2.5493, 
    1.3207, 2.3459, 4.5259, 1.6239, -0.1763, 1.929, 1.5237, 1.7709, 
    2.1231, -1.5679, 2.9978, -1.0623, 6.3311, 3.2371, -0.0466, 
    -3.2293, 0.3979, -3.4121, -1.6269, 0.8722, -0.2534, 2.3849, 
    -1.6068, -0.8486, 0.9351, -1.8844, 2.8963, 0.4948, 1.3549, 
    0.522, 2.3628, -2.2726), cluster = c(5L, 2L, 1L, 1L, 2L, 
    1L, 2L, 4L, 2L, 3L, 5L, 3L, 2L, 3L, 5L, 1L, 4L, 3L, 3L, 4L, 
    3L, 4L, 1L, 4L, 3L, 4L, 3L, 2L, 3L, 5L, 3L, 4L, 2L, 2L, 4L, 
    2L, 2L, 3L, 4L, 4L, 2L, 1L, 4L, 1L, 3L, 5L, 5L, 3L, 4L, 2L
    ), groupnum = c(8L, 9L, 2L, 2L, 4L, 2L, 4L, 10L, 4L, 6L, 
    8L, 8L, 9L, 2L, 8L, 2L, 9L, 6L, 6L, 10L, 8L, 10L, 10L, 10L, 
    2L, 10L, 6L, 4L, 6L, 8L, 8L, 6L, 9L, 9L, 10L, 4L, 9L, 6L, 
    9L, 10L, 4L, 2L, 9L, 2L, 2L, 4L, 8L, 6L, 6L, 9L), groupttl = c("MN", 
    "GC", "GMS", "GMS", "MP", "GMS", "MP", "MS", "MP", "BC", 
    "MN", "MN", "GC", "GMS", "MN", "GMS", "GC", "BC", "BC", "MS", 
    "MN", "MS", "MS", "MS", "GMS", "MS", "BC", "MP", "BC", "MN", 
    "MN", "BC", "GC", "GC", "MS", "MP", "GC", "BC", "GC", "MS", 
    "MP", "GMS", "GC", "GMS", "GMS", "MP", "MN", "BC", "BC", 
    "GC")), .Names = c("PC1", "PC2", "PC3", "cluster", "groupnum", 
"groupttl"), row.names = c(NA, 50L), class = "data.frame")

我期待在这方面的一些帮助!

谢谢 SP

4

0 回答 0