在主成分分析中,我提取prcomp()
散点图的成分结果。我想添加组名的标签,然后在每个组中使用MASS::cov.trob()
. 我创建新的统计数据并重建新的几何图形ggplot2::ggproto()
,以展示每个组的标签。但是,新图具有不合理的图例,因为它应该是点图例而不是字符图例。我已经尝试了多种变体,但它们似乎都不起作用。有任何想法吗?这是我的代码:
# data
data(Cars93, package = "MASS")
car_df <- Cars93[, c(3, 5, 13:15, 17, 19:25)]
car_df <- subset(car_df, Type == "Large" | Type == "Midsize" | Type == "Small")
x1 <- mean(car_df$Price) + 2 * sd(car_df$Price)
x2 <- mean(car_df$Price) - 2 * sd(car_df$Price)
car_df <- subset(car_df, Price > x2 | Price < x1)
car_df <- na.omit(car_df)
# Principal Component Analysis
car.pca <- prcomp(car_df[, -1], scale = T)
car.pca_pre <- cbind(as.data.frame(predict(car.pca)[, 1:2]), car_df[, 1])
colnames(car.pca_pre) <- c("PC1", "PC2", "Type")
head(car.pca_pre)
# create a new stat
library(ggplot2)
StatLabel <- ggproto("StatLabel" ,Stat,
compute_group = function(data, scales) {
library(MASS)
df <- data.frame(data$x,data$y)
center <- cov.trob(df)$center
names(center)<- NULL
center <- t(as.data.frame(center))
center <- as.data.frame(cbind(center))
colnames(center) <- c("x","y")
rownames(center) <- NULL
return(center)
},
required_aes = c("x", "y")
)
stat_label <- function (mapping = NULL, data = NULL, stat = "identity", position = "identity",
..., parse = FALSE, nudge_x = 0, nudge_y = 0, label.padding = unit(0.15,
"lines"), label.r = unit(0.15, "lines"), label.size = 0.1,
na.rm = FALSE, show.legend = NA, inherit.aes = TRUE)
{
if (!missing(nudge_x) || !missing(nudge_y)) {
if (!missing(position)) {
stop("Specify either `position` or `nudge_x`/`nudge_y`",
call. = FALSE)
}
position <- position_nudge(nudge_x, nudge_y)
}
layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(parse = parse, label.padding = label.padding,
label.r = label.r, label.size = label.size, na.rm = na.rm,
...))
}
# plot
ggplot(car.pca_pre, aes(PC1, PC2, color = Type)) + geom_point() +
stat_label(aes(label = Type))