1

考虑这个简单的例子

library(dplyr)
library(forcats)
library(ggplot2)

mydata <- data_frame(cat1 = c(1,1,2,2),
           cat2 = c('a','b','a','b'),
           value = c(10,20,-10,-20),
           time = c(1,2,1,2))

mydata <- mydata %>% mutate(cat1 = factor(cat1),
                 cat2 = factor(cat2))

> mydata
# A tibble: 4 x 4
  cat1  cat2  value  time
  <fct> <fct> <dbl> <dbl>
1 1     a      10.0  1.00
2 1     b      20.0  2.00
3 2     a     -10.0  1.00
4 2     b     -20.0  2.00

现在,我想创建一个图表,在其中交互两个因子变量。我知道我可以使用interactggplot2见下文)。

我的大问题是我不知道如何自动化交互的标签(和着色),这样我就可以避免使用scale_colour_manual.

例如:

ggplot(mydata,
       aes(x = time, y = value, col = interaction(cat1, cat2) )) + 
  geom_point(size=15) + theme(legend.position="bottom")+
  scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
  theme(legend.position="bottom",
        legend.text=element_text(size=12, face = "bold")) +
  scale_colour_manual(name = ""
                      , values=c("red","red4","royalblue","royalblue4")
                      , labels=c("1-b","1-a"
                                 ,"2-a","2-b"))

显示:

在此处输入图像描述

由于我在scale_colour_manual(). 事实上,鲜红的点是1-a和不是1-b(注意标签是如何简单地连接变量名)。这个想法是,随着因子水平的增加,猜测正确的顺序可能会很棘手。

有没有办法自动化这个标签(更好:标签和着色)?也许使用forcats?也许事先在数据框中将标签创建为字符串?

谢谢!

4

1 回答 1

1

如果 cat1 / cat2 的因子级别数不固定(但可能远大于 2),我会尝试使用 计算适当的颜色hsv(),而不是手动分配它们。

这里的颜色备忘单很好地总结了 HSV 颜色模型:

色轮

色调 (h) 本质上是你的彩虹色轮,饱和度 (s) 决定颜色的强度,值 (v) 决定颜色的深浅。每个参数都接受 [0, 1] 范围内的值。

以下是我将如何适应这个用例:

mydata2 <- mydata %>%

  # use "-" instead of the default "." since we are using that for the labels anyway
  mutate(interacted.variable = interaction(cat1, cat2, sep = "-")) %>%

  # cat1: assign hue evenly across the whole wheel,
  # cat2: restrict both saturation & value to the [0.3, 1], as it can look too
  #       faint / dark otherwise
  mutate(colour = hsv(h = as.integer(cat1) / length(levels(cat1)),
                      s = 0.3 + 0.7 * as.integer(cat2) / length(levels(cat2)),
                      v = 0.3 + 0.7 * as.integer(cat2) / length(levels(cat2))))

# create the vector of colours for scale_colour_manual()
manual.colour <- mydata2 %>% select(interacted.variable, colour) %>% unique()
colour.vector <- manual.colour$colour
names(colour.vector) <- manual.colour$interacted.variable
rm(manual.colour)

> colour.vector
      1-a       1-b       2-a       2-b 
"#3AA6A6" "#00FFFF" "#A63A3A" "#FF0000" 

使用针对任意数量的因素自动计算的颜色,绘图变得非常简单:

ggplot(mydata2,
       aes(x = time, y = value, colour = interacted.variable)) +
  geom_point(size = 15) +
  scale_colour_manual(name = "",
                      values = colour.vector,
                      breaks = names(colour.vector)) +
  theme(legend.position = "bottom")

阴谋

具有更多因子级别的插图(代码相同,只是guide_legend(byrow = TRUE)在色标中添加了指定:

mydata3 <- data.frame(
  cat1 = factor(rep(1:3, times = 5)),
  cat2 = rep(LETTERS[1:5], each = 3),
  value = 1:15,
  time = 15:1
) %>%
  mutate(interacted.variable = interaction(cat1, cat2, sep = "-"),
         colour = hsv(h = as.integer(cat1) / length(levels(cat1)),
                      s = 0.3 + 0.7 * as.integer(cat2) / length(levels(cat2)),
                      v = 0.3 + 0.7 * as.integer(cat2) / length(levels(cat2))))

manual.colour <- mydata3 %>% arrange(cat1, cat2) %>%
  select(interacted.variable, colour) %>% unique()
colour.vector <- manual.colour$colour
names(colour.vector) <- manual.colour$interacted.variable
rm(manual.colour)

ggplot(mydata3,
       aes(x = time, y = value, colour = interacted.variable)) +
  geom_point(size = 15) +
  scale_colour_manual(name = "",
                      values = colour.vector,
                      breaks = names(colour.vector),
                      guide = guide_legend(byrow = TRUE)) +
  theme(legend.position = "bottom")

例子

于 2018-02-23T05:41:24.173 回答