0

我有一个数据集,其中记录了 40 个特定基因中突变的存在/不存在,比较了 20 种组织类型的正常组织(例如肺组织)与来自该组织的肿瘤(例如肺肿瘤)。我正在努力寻找可视化这些数据的最佳方法。

数据的子集:

Gene    Lung_Normal Lung_Cancer Skin_Normal Skin_Cancer Brain_Normal    Brain_Cancer
Gene_1  TRUE    TRUE    TRUE    TRUE    TRUE    TRUE
Gene_2  TRUE    TRUE    TRUE    TRUE    TRUE    TRUE
Gene_3  FALSE   TRUE    FALSE   FALSE   FALSE   FALSE
Gene_4  FALSE   FALSE   FALSE   FALSE   FALSE   FALSE
Gene_5  FALSE   TRUE    FALSE   FALSE   FALSE   TRUE
Gene_6  FALSE   FALSE   TRUE    TRUE    TRUE    TRUE
Gene_7  FALSE   FALSE   FALSE   TRUE    FALSE   FALSE
Gene_8  FALSE   FALSE   FALSE   TRUE    FALSE   TRUE
Gene_9  FALSE   TRUE    FALSE   FALSE   FALSE   FALSE
Gene_10 FALSE   FALSE   FALSE   TRUE    FALSE   TRUE

我们想要传达的关键信息是,虽然相同的 3-4 个基因在正常组织中经常发生突变,但每个肿瘤都有更多的额外基因发生突变,并且肿瘤中的多样性也更多。我可以将其保留为这样的表格,但我很想找到一种以清晰方式可视化信息的好方法。

我想尝试制作一个图形,例如马戏团情节,其中一个圆圈带有两个代表所有数据的环。内环是正常组织,外环是癌组织,每一段在内环上包含相关的正常组织,在外环上包含相关的癌组织。每个基因都将进行颜色编码,并且仅在发生突变时显示。因此,对于所有正常组织,2-3 个突变基因的片段将显示 2-3 种颜色,而外部癌症片段将显示更多颜色片段,代表更多的突变。

但是我还没有找到可以创建这种可视化的绘图软件。有谁知道制作这样的可视化的方法?即使只是将我指向一个 R 包也会很有帮助。我研究了马戏团和雷达图,但我还没有找到一个可以制作我想到的可视化类型的包,只显示每种情况下发生的事件。

如果有人认为不同类型的可视化可以代表这些数据,请告诉我,我很乐意考虑能够清楚地代表数据的替代方案。

先感谢您。

4

3 回答 3

5

不确定这是否是你要找的,但我试了一下。另外,从上面的描述中,我不能完全确定您想对不同类型的细胞(肺、皮肤、脑)做什么?如果这不是您想要的,也许您可​​以发布一张预期输出应该是什么样子的图。

下图中,内圈是正常细胞,外圈是癌细胞。我在这里的回答受益于这篇文章

## Make the data
tib <- tibble::tribble(
  ~Gene,    ~Lung_Normal, ~Lung_Cancer, ~Skin_Normal, ~Skin_Cancer, ~Brain_Normal,    ~Brain_Cancer,
"Gene_1", TRUE    , TRUE    , TRUE    , TRUE    , TRUE    , TRUE,
"Gene_2",   TRUE,     TRUE,     TRUE,     TRUE,     TRUE,     TRUE, 
"Gene_3", FALSE   , TRUE    , FALSE   , FALSE   , FALSE   , FALSE,
"Gene_4",   FALSE,    FALSE,    FALSE,    FALSE,    FALSE,    FALSE, 
"Gene_5", FALSE   , TRUE    , FALSE   , FALSE   , FALSE   , TRUE,
"Gene_6",   FALSE,    FALSE,    TRUE,     TRUE,     TRUE,     TRUE, 
"Gene_7", FALSE   , FALSE   , FALSE   , TRUE    , FALSE   , FALSE,
"Gene_8",   FALSE,    FALSE,    FALSE,    TRUE,     FALSE,    TRUE, 
"Gene_9", FALSE   , TRUE    , FALSE   , FALSE   , FALSE   , FALSE,
"Gene_10",  FALSE,    FALSE,    FALSE,    TRUE,     FALSE,    TRUE)

library(tidyr)
library(dplyr)

## Re-arrange into long format
tib <- tib %>% 
  pivot_longer(cols=-Gene, names_pattern="(.*)_(.*)", names_to=c("type", ".value")) %>%  
  pivot_longer(c(Normal, Cancer), names_to = "diag", values_to="val") %>% 
  # code colors as the gene if it's mutated, otherwise Unmutated
  mutate(f = case_when(val ~ Gene, TRUE ~ "Unmutated")) %>% 
  group_by(Gene, f, diag) %>% 
  summarise(s = n()) %>% 
  mutate(diag = factor(diag, levels=c("Normal", "Cancer")), 
         f = factor(f, levels=c(paste("Gene", c(1,2,6,3,5,7,8,9,10,4), sep="_"), "Unmutated"))) 

library(ggplot2)
library(RColorBrewer)
ggplot(tib, aes(x=diag, 
                y = s, 
                fill=f)) + 
  geom_bar(stat="identity") + 
  coord_polar("y") + 
  theme_void() + 
  scale_fill_manual(values=c(brewer.pal(9, "Paired"), "gray75")) + 
  labs(fill = "Mutations")

在此处输入图像描述


编辑

这是艾伦建议的数据的样子。这种方法不能很好地扩展,因为需要大量颜色会降低绘图的可读性。

df <- structure(list(genes = c("Gene1", "Gene2", "Gene3", "Gene4", 
"Gene5", "Gene6", "Gene7", "Gene8", "Gene9", "Gene10", "Gene11", 
"Gene12", "Gene13", "Gene14", "Gene15", "Gene16", "Gene17", "Gene18", 
"Gene19", "Gene20", "Gene21", "Gene22", "Gene23", "Gene24", "Gene25", 
"Gene26", "Gene27", "Gene28", "Gene29", "Gene30", "Gene31", "Gene32", 
"Gene33", "Gene34", "Gene35", "Gene36", "Gene37", "Gene38", "Gene39", 
"Gene40"), bone_cancer = c(FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, 
TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE), bone_normal = c(FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
TRUE, FALSE, TRUE), brain_cancer = c(TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE), brain_normal = c(FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, 
FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE), breast_cancer = c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), breast_normal = c(TRUE, 
FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
FALSE), colon_cancer = c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE), colon_normal = c(FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
TRUE, TRUE, FALSE), kidney_cancer = c(FALSE, FALSE, FALSE, FALSE, 
                              FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
                              TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, 
                              FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
                              FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE), 
kidney_normal = c(FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, 
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, 
TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE), liver_cancer = c(FALSE, 
FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE), liver_normal = c(TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, 
TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, 
TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE), lung_cancer = c(TRUE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), 
lung_normal = c(FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), prostate_cancer = c(TRUE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, 
FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
TRUE, FALSE, TRUE), prostate_normal = c(TRUE, FALSE, FALSE, 
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE), skin_cancer = c(FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE), skin_normal = c(TRUE, 
FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, 
FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, 
FALSE, FALSE, FALSE), thyroid_cancer = c(FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, 
FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE), thyroid_normal = c(FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)), 
class = "data.frame", row.names = c(NA, 40L))
names(df)[1] <- "Gene"
tib <- df %>% 
  pivot_longer(cols=-Gene, names_pattern="(.*)_(.*)", names_to=c("type", ".value")) %>%  
  pivot_longer(c(normal, cancer), names_to = "diag", values_to="val") %>% 
  # code colors as the gene if it's mutated, otherwise Unmutated
  mutate(f = case_when(val ~ Gene, TRUE ~ "Unmutated")) %>% 
  group_by(Gene, f, diag) %>% 
  summarise(s = n()) %>% 
  ungroup() %>% 
  group_by(Gene) %>% 
  mutate(diag = factor(diag, levels=c("normal", "cancer")))
         

levs <- tib %>% 
  dplyr::select(f, s) %>% 
  summarise(pct_mutated = sum(s*(f!= "Unmutated"))/sum(s)) %>% 
  arrange(-pct_mutated)  %>% 
  dplyr::select(Gene) %>% 
  pull()


tib<- tib %>% 
  mutate(f = factor(f, levels=c(levs, "Unmutated")))



library(ggplot2)
library(RColorBrewer)
ggplot(tib, aes(x=diag, 
                y = s, 
                fill=f)) + 
  geom_bar(stat="identity") + 
  coord_polar("y") + 
  theme_void() + 
  scale_fill_manual(values=c(rainbow(length(levels(tib$f))-1), "gray75")) + 
  labs(fill = "Mutations")

在此处输入图像描述

于 2020-09-28T14:06:56.343 回答
3

另一种选择是热图。您可以通过区分癌症与正常情况来做到这一点,或者调整填充以仅反映癌症中的突变、仅反映正常组织中的突变、两者兼而有之或两者都不反映。

对于这两种情况,首先有必要重塑您的数据:

选项1

library(tidyr)
library(dplyr)
library(ggplot2)

df %>% 
  pivot_longer(-1) %>%
  separate(name, into = c("tissue", "state"), sep = "_") %>%
  mutate(genes = factor(genes, paste0("Gene", 1:40))) %>%
  ggplot(aes(tissue, genes, fill = value)) + geom_tile(color = "black") +
  facet_grid(.~state) +
  scale_x_discrete(guide = guide_axis(n.dodge = 2))

在此处输入图像描述

选项 2

df %>% 
  pivot_longer(-1) %>%
  separate(name, into = c("tissue", "state"), sep = "_") %>%
  mutate(genes = factor(genes, paste0("Gene", 1:40))) %>%
  group_by(genes, tissue) %>%
  summarize(mutations = factor(2 + diff(value) + 2 * all(value))) %>%
  ggplot(aes(tissue, genes, fill = mutations)) + geom_tile(color = "black") +
  scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
  scale_fill_discrete(labels = c("Neither", "Cancer only", "Healthy only", "Both"))

在此处输入图像描述

对于这些,我使用了应该近似于您的数据结构的示例数据:

df <- structure(list(genes = c("Gene1", "Gene2", "Gene3", "Gene4", 
"Gene5", "Gene6", "Gene7", "Gene8", "Gene9", "Gene10", "Gene11", 
"Gene12", "Gene13", "Gene14", "Gene15", "Gene16", "Gene17", "Gene18", 
"Gene19", "Gene20", "Gene21", "Gene22", "Gene23", "Gene24", "Gene25", 
"Gene26", "Gene27", "Gene28", "Gene29", "Gene30", "Gene31", "Gene32", 
"Gene33", "Gene34", "Gene35", "Gene36", "Gene37", "Gene38", "Gene39", 
"Gene40"), bone_cancer = c(FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, 
TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE), bone_normal = c(FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
TRUE, FALSE, TRUE), brain_cancer = c(TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE), brain_normal = c(FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, 
FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE), breast_cancer = c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), breast_normal = c(TRUE, 
FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, 
FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
FALSE), colon_cancer = c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE), colon_normal = c(FALSE, 
TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
TRUE, TRUE, FALSE), kidney_cancer = c(FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE), 
    kidney_normal = c(FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
    FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, 
    TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, 
    TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, 
    TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE), liver_cancer = c(FALSE, 
    FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, 
    FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, 
    FALSE, FALSE, FALSE), liver_normal = c(TRUE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, 
    TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, 
    TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
    FALSE), lung_cancer = c(TRUE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), 
    lung_normal = c(FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, 
    TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), prostate_cancer = c(TRUE, 
    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, 
    FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    TRUE, FALSE, TRUE), prostate_normal = c(TRUE, FALSE, FALSE, 
    FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE), skin_cancer = c(FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, 
    TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE), skin_normal = c(TRUE, 
    FALSE, TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, 
    FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, 
    TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, 
    TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, 
    FALSE, FALSE, FALSE), thyroid_cancer = c(FALSE, FALSE, FALSE, 
    FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, 
    FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, 
    FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE), thyroid_normal = c(FALSE, FALSE, TRUE, FALSE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, 
    FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE)), class = "data.frame", row.names = c(NA, 
40L))
于 2020-09-28T14:38:24.313 回答
2

这是一个替代方案。类似于热图,但使用彩色点。我还选择隐藏非突变,因为我认为我们对可视化突变发生的位置更感兴趣。如果这些FALSE值也被着色,它会通过为我们的大脑提供额外的解释来增加认知负荷。

# Creating the data.
gene_data <- structure(list(Gene = c("Gene_1", "Gene_2", "Gene_3", "Gene_4", 
"Gene_5", "Gene_6", "Gene_7", "Gene_8", "Gene_9", "Gene_10"), 
    Lung_Normal = c(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, 
    FALSE, FALSE, FALSE), Lung_Cancer = c(TRUE, TRUE, TRUE, FALSE, 
    TRUE, FALSE, FALSE, FALSE, TRUE, FALSE), Skin_Normal = c(TRUE, 
    TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE
    ), Skin_Cancer = c(TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, 
    TRUE, TRUE, FALSE, TRUE), Brain_Normal = c(TRUE, TRUE, FALSE, 
    FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE), Brain_Cancer = c(TRUE, 
    TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE)), row.names = c(NA, 
-10L), class = c("tbl_df", "tbl", "data.frame"))

稍微处理一下数据:

mutation_data <- gene_data %>%
  pivot_longer(-Gene, values_to = "Mutation") %>%  # making long form
  separate(name, into = c("Type", "Status")) %>% # splitting cell type and cancer status
  mutate(Mutation = as.numeric(Mutation), # cleaning data for plotting
         Type = factor(Type, levels = c("Cancer", "Normal")),
         Gene = factor(Gene, levels = paste0("Gene_", 1:10))) %>% #genes appear in order
  filter(Mutation == 1) # Removing FALSE for cleaner plot

构建情节

 ggplot() +
    # plotting all cancer mutations in red
    geom_point(data = mutation_data %>% filter(Status == "Cancer"), 
               aes(x = Type, y = Gene, color = Status),  size = 4)  +
    # plotting all mutations present in normal and cancer in black over top of the red
    geom_point(data = mutation_data %>% filter(Status == "Normal"), 
             aes(x = Type, y = Gene, color = Status), size = 4) +
    # formatting the color scale and legend
    scale_color_manual(name = "Mutation locations", values = c("Cancer" = "Red", "Normal" = "Black"),
                     labels = c("Cancer cells only", "Cancer and normal cells")) +
    # providing title, subtitle, and x label
    labs(title = "Gene mutations in brain, lung, and skin cells",
         subtitle = "Cancer cells vs. normal cells",
         x = "Cell type") +
    theme_bw() # setting a theme I like

在此处输入图像描述

于 2020-09-28T16:11:04.360 回答