我正在使用 clusplot 绘制使用 kmeans 的 PC,但棘手的部分是,我希望椭圆由不同的变量(不是用于数据点的 f$cluster)进行颜色编码。这是我给出的命令,但这里有些东西不起作用!
clusplot(f[,1:3],f$cluster,cex=0.8,color=TRUE,shade=TRUE,
labels=5,
col.clus=unique(f$groupnum),
lines=0,main="testing")
我的 f 是一个有 33 列的数据框。PC1 到 PC30,然后是 cluster、groupnum 和 groupttl。$cluster 代码用于聚类,但我正在尝试使用 $groupnum 进行颜色编码,并可能使用 $groupttl 来标记椭圆。
一件重要的事情是:$cluster 和 $groupnum 都可以有不同的#groups(即)我只有 5 个集群,但有 6 个唯一的 groupnum 值要为椭圆着色。因此,通常情况下,要着色的椭圆会多于实际的簇数。
谢谢保罗和舒亚。这是我的数据框
structure(list(PC1 = c(8.2545, 1.0159, -1.5319, -1.7703, 1.6903,
-1.7378, -2.1898, -2.8501, 1.0669, -0.8577, 6.3985, 2.6495, 2.6374,
0.2395, 8.6415, -0.0638, -2.739, -0.7897, 0.3137, -3.0958, 2.9457,
-4.1256, -2.6289, -2.9377, -0.6272, -2.7296, 1.2871, -1.6917,
-0.1118, 11.1845, 2.6486, -3.4377, -0.5581, -3.217, -2.5425,
-1.419, -0.9338, -0.8993, -3.119, -3.5188, 1.6804, -0.4142, -2.3187,
-0.1962, -1.3428, 5.1539, 10.3632, -0.9815, -2.7796, 0.0708),
PC2 = c(-1.6021, -5.4785, 3.3933, 3.6795, -3.0405, 5.3614,
-2.895, 2.2553, -2.648, -0.451, -1.3402, -2.6696, -4.3236,
0.0659, -0.4115, 4.0168, 0.034, -0.2273, -0.5867, 0.8339,
-0.2328, 1.5119, 5.183, -0.7078, 0.5813, 2.8371, 0.8223,
-1.2817, 0.0378, -3.297, -1.0233, 0.5048, -1.9093, -5.5851,
-0.8716, -2.135, -4.2768, 1.567, -0.1263, 2.4107, -1.3151,
1.6173, -0.3908, 3.7365, -1.3812, -2.1328, -0.05, 0.849,
1.9369, -4.7095), PC3 = c(1.4114, -2.0719, -1.927, -0.417,
-2.2733, -1.8481, -2.807, 0.8132, -2.6583, -0.5894, 1.4173,
2.9953, 0.2831, -1.0971, 2.7126, -3.8635, -0.5739, 2.5493,
1.3207, 2.3459, 4.5259, 1.6239, -0.1763, 1.929, 1.5237, 1.7709,
2.1231, -1.5679, 2.9978, -1.0623, 6.3311, 3.2371, -0.0466,
-3.2293, 0.3979, -3.4121, -1.6269, 0.8722, -0.2534, 2.3849,
-1.6068, -0.8486, 0.9351, -1.8844, 2.8963, 0.4948, 1.3549,
0.522, 2.3628, -2.2726), cluster = c(5L, 2L, 1L, 1L, 2L,
1L, 2L, 4L, 2L, 3L, 5L, 3L, 2L, 3L, 5L, 1L, 4L, 3L, 3L, 4L,
3L, 4L, 1L, 4L, 3L, 4L, 3L, 2L, 3L, 5L, 3L, 4L, 2L, 2L, 4L,
2L, 2L, 3L, 4L, 4L, 2L, 1L, 4L, 1L, 3L, 5L, 5L, 3L, 4L, 2L
), groupnum = c(8L, 9L, 2L, 2L, 4L, 2L, 4L, 10L, 4L, 6L,
8L, 8L, 9L, 2L, 8L, 2L, 9L, 6L, 6L, 10L, 8L, 10L, 10L, 10L,
2L, 10L, 6L, 4L, 6L, 8L, 8L, 6L, 9L, 9L, 10L, 4L, 9L, 6L,
9L, 10L, 4L, 2L, 9L, 2L, 2L, 4L, 8L, 6L, 6L, 9L), groupttl = c("MN",
"GC", "GMS", "GMS", "MP", "GMS", "MP", "MS", "MP", "BC",
"MN", "MN", "GC", "GMS", "MN", "GMS", "GC", "BC", "BC", "MS",
"MN", "MS", "MS", "MS", "GMS", "MS", "BC", "MP", "BC", "MN",
"MN", "BC", "GC", "GC", "MS", "MP", "GC", "BC", "GC", "MS",
"MP", "GMS", "GC", "GMS", "GMS", "MP", "MN", "BC", "BC",
"GC")), .Names = c("PC1", "PC2", "PC3", "cluster", "groupnum",
"groupttl"), row.names = c(NA, 50L), class = "data.frame")
我期待在这方面的一些帮助!
谢谢 SP