1

在主成分分析中,我提取prcomp()散点图的成分结果。我想添加组名的标签,然后在每个组中使用MASS::cov.trob(). 我创建新的统计数据并重建新的几何图形ggplot2::ggproto(),以展示每个组的标签。但是,新图具有不合理的图例,因为它应该是点图例而不是字符图例。我已经尝试了多种变体,但它们似乎都不起作用。有任何想法吗?这是我的代码:

# data
data(Cars93, package = "MASS")
car_df <- Cars93[, c(3, 5, 13:15, 17, 19:25)]
car_df <- subset(car_df, Type == "Large" | Type == "Midsize" | Type == "Small")
x1 <- mean(car_df$Price) + 2 * sd(car_df$Price)
x2 <- mean(car_df$Price) - 2 * sd(car_df$Price)
car_df <- subset(car_df, Price > x2 | Price < x1)
car_df <- na.omit(car_df)

# Principal Component Analysis
car.pca <- prcomp(car_df[, -1], scale = T)
car.pca_pre <- cbind(as.data.frame(predict(car.pca)[, 1:2]), car_df[, 1])
colnames(car.pca_pre) <- c("PC1", "PC2", "Type")
head(car.pca_pre)

# create a new stat
library(ggplot2)
StatLabel <- ggproto("StatLabel" ,Stat,
               compute_group = function(data, scales) {
                library(MASS)
                df <- data.frame(data$x,data$y)
                center <- cov.trob(df)$center
                names(center)<- NULL 
                center <- t(as.data.frame(center))
                center <- as.data.frame(cbind(center))
                colnames(center) <- c("x","y")
                rownames(center) <- NULL
                return(center)
                },
                required_aes = c("x", "y")
)

stat_label <- function (mapping = NULL, data = NULL, stat = "identity", position = "identity", 
    ..., parse = FALSE, nudge_x = 0, nudge_y = 0, label.padding = unit(0.15, 
        "lines"), label.r = unit(0.15, "lines"), label.size = 0.1, 
    na.rm = FALSE, show.legend = NA, inherit.aes = TRUE) 
{
    if (!missing(nudge_x) || !missing(nudge_y)) {
        if (!missing(position)) {
            stop("Specify either `position` or `nudge_x`/`nudge_y`", 
                call. = FALSE)
        }
        position <- position_nudge(nudge_x, nudge_y)
    }
    layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel, 
        position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
        params = list(parse = parse, label.padding = label.padding, 
            label.r = label.r, label.size = label.size, na.rm = na.rm, 
            ...))
}

# plot
ggplot(car.pca_pre, aes(PC1, PC2, color = Type)) + geom_point() + 
stat_label(aes(label = Type))

在此处输入图像描述

4

1 回答 1

1

我认为在图例中显示新的统计数据点不是很自然,因为它没有绘制任何点。就目前而言,当点和文本都具有组合图例时,ggplot 似乎优先于文本图例。最简单的解决方案是默认情况下没有标签统计的图例。

您可以将函数更改show.legend = FALSE为默认值,然后您的绘图将显示点图例。

stat_label <- function (mapping = NULL, 
                        data = NULL, 
                        stat = "identity", 
                        position = "identity", 
                        ..., 
                        parse = FALSE, 
                        nudge_x = 0, nudge_y = 0, 
                        label.padding = unit(0.15, "lines"), 
                        label.r = unit(0.15, "lines"), 
                        label.size = 0.1, 
                        na.rm = FALSE, 
                        show.legend = FALSE,       ## <--- change
                        inherit.aes = TRUE) 
{
  if (!missing(nudge_x) || !missing(nudge_y)) {
    if (!missing(position)) {
      stop("Specify either `position` or `nudge_x`/`nudge_y`", 
           call. = FALSE)
    }
    position <- position_nudge(nudge_x, nudge_y)
  }
  layer(data = data, mapping = mapping, stat = StatLabel, geom = GeomLabel, 
        position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
        params = list(parse = parse, label.padding = label.padding, 
                      label.r = label.r, label.size = label.size, na.rm = na.rm, 
                      ...))
}

# plot
ggplot(car.pca_pre, aes(PC1, PC2, color = Type)) + geom_point() + 
  stat_label(aes(label = Type))

在此处输入图像描述

于 2017-03-02T09:42:13.820 回答