1

我有以下数据框:

library(tidyverse)
library(directlabels)
dat <- structure(list(time.course = c("CONTROL", "DAY03", "DAY06", "DAY09", 
"DAY12", "DAY15", "CONTROL", "DAY03", "DAY06", "DAY09", "DAY12", 
"DAY15", "CONTROL", "DAY03", "DAY06", "DAY09", "DAY12", "DAY15", 
"CONTROL", "DAY03", "DAY06", "DAY09", "DAY12", "DAY15"), log_delta = c(0, 
0.620163956872191, 0.97251217133899, 0.788819459139427, 0.412543422847407, 
0.401621905837411, 0, -0.168711062429047, -0.973481367557294, 
-1.46433243027353, -1.34771037206345, -1.77709667157235, 0, -0.187344700204557, 
-0.254280909246003, -0.335330756378048, -0.655121382977672, -1.1733031812697, 
0, -0.0160729795971869, -0.628563089917479, -1.43060414378064, 
-1.466051599194, -2.57510172892555), `UMAP cluster` = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 7L, 
7L, 13L, 13L, 13L, 13L, 13L, 13L), .Label = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"
), class = "factor"), cell_name = structure(c(1L, 1L, 1L, 1L, 
1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L), .Label = c("Macrophage", "Enteroendocrine", 
"Endothelial", "Lymphatic", "Fibroblast", "T cell", "Myofibroblast", 
"Absorbtice & secrectory cell", "Plasmacytoid DC", "Neutrophil", 
"Plasma cell", "Cajal intestinal cell", "Glial cell", "Germinal center B cell"
), class = "factor")), row.names = c(NA, -24L), class = c("tbl_df", 
"tbl", "data.frame"))


dat

它看起来像这样:

# A tibble: 24 x 4
   time.course log_delta `UMAP cluster` cell_name    
   <chr>           <dbl> <fct>          <fct>        
 1 CONTROL        0      1              Macrophage   
 2 DAY03          0.620  1              Macrophage   
 3 DAY06          0.973  1              Macrophage   
 4 DAY09          0.789  1              Macrophage   
 5 DAY12          0.413  1              Macrophage   
 6 DAY15          0.402  1              Macrophage   
 7 CONTROL        0      5              Fibroblast   
 8 DAY03         -0.169  5              Fibroblast   
 9 DAY06         -0.973  5              Fibroblast   
10 DAY09         -1.46   5              Fibroblast   
11 DAY12         -1.35   5              Fibroblast   
12 DAY15         -1.78   5              Fibroblast   
13 CONTROL        0      7              Myofibroblast
14 DAY03         -0.187  7              Myofibroblast
15 DAY06         -0.254  7              Myofibroblast
16 DAY09         -0.335  7              Myofibroblast
17 DAY12         -0.655  7              Myofibroblast
18 DAY15         -1.17   7              Myofibroblast
19 CONTROL        0      13             Myofibroblast
20 DAY03         -0.0161 13             Myofibroblast
21 DAY06         -0.629  13             Myofibroblast
22 DAY09         -1.43   13             Myofibroblast
23 DAY12         -1.47   13             Myofibroblast
24 DAY15         -2.58   13             Myofibroblast

请注意,“肌成纤维细胞”出现两次,分别为UMAP cluster7 和 13。

我尝试将此代码与directlabels 包一起使用:

ggplot(dat, aes(x = time.course, y = log_delta, 
                                                      color = cell_name)) +
  geom_line(aes(group = `UMAP cluster`)) + 
  scale_x_discrete(expand = c(0, 2.5)) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  xlab("") +
  ylab("log(proportion / control proportion)") + 
  theme(legend.title = element_blank()) +
  geom_dl(aes(label = cell_name), method = list(dl.trans(x = x + 0.3), "last.bumpup", cex = 0.8)) 

情节如下所示:

在此处输入图像描述

请注意,Myofibroblast这不会出现在两行的末尾(蓝色)。我想做的是

  1. 用两种颜色给肌成纤维细胞上色
  2. 每个肌成纤维细胞系也带有标签。

我怎样才能做到这一点?

4

1 回答 1

0

使用 geom_dl 是不可能的,因为它继承了 aes 并直接将列变量作为标签。我可以想到两个解决方案,首先是通过将集群 ID 与单元类型融合来创建一个新变量:

# the column name is giving some problems
colnames(dat)[3] = "UMAPcluster"

dat <- dat %>% mutate(new=paste(cell_name,UMAPcluster))
dat$new <- factor(dat$new,levels=unique(dat$new))

ggplot(dat, aes(x = time.course, y = log_delta,,group=new,col = new)) +
  geom_line() + 
  scale_x_discrete(expand = c(0, 2.5)) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  xlab("") +
  ylab("log(proportion / control proportion)") + 
  theme(legend.title = element_blank()) +
  geom_dl(aes(label = new), method = list(dl.trans(x = x + 0.3), "last.bumpup", cex = 0.8))

在此处输入图像描述

或者您创建一个新数据框并使用 geom_text (或 geom_label 如果您喜欢框)进行注释。最好保留集群的图例,以便清楚颜色的含义。

LAB = dat %>% group_by(UMAPcluster) %>% top_n(1,wt=time.course)

ggplot(dat, aes(x = time.course, y = log_delta,color = UMAPcluster)) +
  geom_line(aes(group = UMAPcluster)) + 
  scale_x_discrete(expand = c(0, 2.5)) +
  theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  xlab("") +
  ylab("log(proportion / control proportion)") + 
  geom_label(data=LAB,aes(label=cell_name),show.legend=FALSE,nudge_x=0.7)

在此处输入图像描述

于 2019-10-30T09:48:28.593 回答