几天来,我一直在尝试对 ggalluvial 中的地层和流量进行排序。我想通过不同的筛查程序(X1、X2、X3、X4)可视化患者的流动,并根据最终诊断对流动进行着色(X4 中的值)。
您能帮我对示例 A 和 B 的第一列中的组内的值进行排序吗?我希望每个组中的所有红色、黄色和蓝色值都堆叠在一起。
到目前为止,我已经尝试过宽格式、aes.flow“向后”和“向前”、lode.guidance 和lode.ordering 的各种组合......
如果这在 ggalluvial 中是不可能的,但在其他包中是可能的,我也想知道。
提前致谢。
宽格式数据:
set.seed(1)
data <- tibble(
ID = 1:879,
X1 = sample(c("only_parent", "parent_and_3D", "only_3D"), size = 879, replace = TRUE, prob = c(0.1, 0.8, 0.1))) %>%
mutate(
X2 = case_when(
X1 == "only_parent" ~ sample(c("only_I", "not_identified"), size = n(), prob = c(0.1, 0.9), replace = TRUE),
X1 == "parent_and_3D" ~ sample(c("only_I", "both_I_and_II", "only_II", "not_identified"), size = n(), prob = c(0.05, 0.05, 0.2, 0.7), replace = TRUE),
X1 == "only_3D"~ sample(c("only_II", "not_identified"), size = n(), prob = c(0.1, 0.9), replace = TRUE),
TRUE ~ NA_character_)) %>%
mutate(
X3 = case_when(
X2 == "only_I" ~ "PO_only",
X2 == "both_I_and_II" ~ sample(c("PO_and_EHL", "PO_and_F/T", "PO_and_F/T_and_EHL"), size = n(), prob = c(0.3, 0.5, 0.2), replace = TRUE),
X2 == "only_II"~ sample(c("F/T", "F/T_and_EHL", "EHL"), size = n(), prob = c(0.1, 0.6, 0.4), replace = TRUE),
X2 == "not_identified" ~ "not_identified",
TRUE ~ NA_character_)) %>%
mutate(
X4 = case_when(
X3 == "PO_only" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.1, 0.88), replace = TRUE),
X3 == "PO_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "PO_and_F/T" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "PO_and_F/T_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "F/T" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.1, 0.88), replace = TRUE),
X3 == "F/T_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.2, 0.88), replace = TRUE),
X3 == "not_identified" ~ "not_identified",
TRUE ~ NA_character_ ))
head(data)
# A tibble: 6 x 5
ID X1 X2 X3 X4
<int> <chr> <chr> <chr> <chr>
1 1 parent_and_3D not_identified not_identified not_identified
2 2 parent_and_3D only_II F/T_and_EHL No TW
3 3 parent_and_3D not_identified not_identified not_identified
4 4 only_parent only_I PO_only No TW
5 5 parent_and_3D only_II F/T_and_EHL No TW
6 6 only_3D not_identified not_identified not_identified
示例 A
值未在第一列的底部框中排序。
data_long_a <- data %>%
group_by(X1, X2, X3, X4) %>%
count() %>%
mutate(
fill_stat = factor(X4, levels = c("not_identified", "No TW", "One_primary_ind", "Two_primary_ind"))) %>%
ungroup %>%
arrange(fill_stat) %>%
mutate(subject = seq(1, n())) %>%
gather(key, value, -n , -subject, -fill_stat) %>%
mutate(
key = factor(key, levels = c("X1", "X2", "X3", "X4"))) %>%
arrange(key, fill_stat)
data_long_a %>%
filter(key %in% c("X1", "X2")) %>%
ggplot(
aes(x = key,
y = n,
stratum = value,
alluvium = subject,
label = value))+
geom_flow(aes(fill = fill_stat)) +
geom_stratum() +
geom_text(stat = "stratum")+
scale_fill_manual(values=c("#BAB3B3EB", "red", "yellow", "blue"))+
theme_void()
示例 B
第一列中的流线未排序。
data_long_b <- data %>%
select(-X1) %>%
filter(X4 != "not_identified") %>%
group_by(X2, X3, X4) %>%
count() %>%
mutate(
fill_stat = factor(X4, levels = c("not_identified", "No TW", "One_primary_ind", "Two_primary_ind"))) %>%
ungroup %>%
arrange(fill_stat) %>%
mutate(subject = seq(1, n())) %>%
gather(key, value, -n , -subject, -fill_stat) %>%
mutate(
key = factor(key, levels = c("X2", "X3", "X4"))) %>%
arrange(key, fill_stat)
data_long_b %>%
ggplot(
aes(x = key,
y = n,
stratum = value,
alluvium = subject,
label = value))+
geom_flow(aes(fill = fill_stat),
aes.flow = "backward") +
geom_stratum() +
geom_text(stat = "stratum")+
scale_fill_manual(values=c("red", "yellow", "blue"))+
theme_void()