> df_1
# A tibble: 47 x 3
# Groups: therapy_class [9]
therapy_class Year_month count
<ord> <yearmon> <int>
1 ALK Inhibitors Dec 2019 16
2 ALK Inhibitors Jan 2020 14
3 ALK Inhibitors Feb 2020 14
4 ALK Inhibitors Mar 2020 22
5 ALK Inhibitors Apr 2020 13
6 ALK Inhibitors May 2020 17
7 Anti-VEGF-based therapies Dec 2019 33
8 Anti-VEGF-based therapies Jan 2020 35
9 Anti-VEGF-based therapies Feb 2020 36
10 Anti-VEGF-based therapies Mar 2020 20
# … with 37 more rows
A tibble: 10 x 7
therapy_class `Dec 2019` `Jan 2020` `Feb 2020` `Mar 2020` `Apr 2020` `May 2020`
<ord> <chr> <chr> <chr> <chr> <chr> <chr>
1 ALK Inhibitors "16 <br>[2.7%]" "14 <br>[2.0%]" "14 <br>[2.2%]" "22 <br>[3.3%]" "13 <br>[2.1%]" "17 <br>[3.4%]"
2 Anti-VEGF-based therapies "33 <br>[5.6%]" "35 <br>[4.9%]" "36 <br>[5.7%]" "20 <br>[3.0%]" "21 <br>[3.4%]" "20 <br>[4.0%]"
3 EGFR TKIs "52 <br>[8.8%]" "57 <br>[8.0%]" "60 <br>[9.5%]" "52 <br>[7.8%]" "56 <br>[9.2%]" "49 <br>[9.8%]"
4 EGFR-antibody based therapies "" "" "" "" "" ""
5 Non-platinum-based chemotherapy… "1 <br>[0.2%]" "4 <br>[0.6%]" "4 <br>[0.6%]" "" "1 <br>[0.2%]" ""
6 IO-based therapies "308 <br>[52.0%]" "385 <br>[54.0%]" "330 <br>[52.3%]" "379 <br>[56.7%]" "345 <br>[56.4%]" "265 <br>[52.9%]"
7 Platinum-based chemotherapy com… "123 <br>[20.8%]" "147 <br>[20.6%]" "128 <br>[20.3%]" "134 <br>[20.1%]" "120 <br>[19.6%]" "107 <br>[21.4%]"
8 Single agent chemotherapies "29 <br>[4.9%]" "33 <br>[4.6%]" "17 <br>[2.7%]" "28 <br>[4.2%]" "25 <br>[4.1%]" "22 <br>[4.4%]"
9 Other "30 <br>[5.1%]" "38 <br>[5.3%]" "42 <br>[6.7%]" "33 <br>[4.9%]" "31 <br>[5.1%]" "21 <br>[4.2%]"
10 <strong>Total</strong> "<strong>592</str… "<strong>713</str… "<strong>631</st… "<strong>668</st… "<strong>612</st… "<strong>501</st…
> df_2
# A tibble: 46 x 3
# Groups: therapy_class [9]
therapy_class Year_month count
<ord> <yearmon> <int>
1 ALK Inhibitors Dec 2019 16
2 ALK Inhibitors Feb 2020 14
3 ALK Inhibitors Mar 2020 22
4 ALK Inhibitors Apr 2020 13
5 ALK Inhibitors May 2020 17
6 Anti-VEGF-based therapies Dec 2019 33
7 Anti-VEGF-based therapies Jan 2020 35
8 Anti-VEGF-based therapies Feb 2020 36
9 Anti-VEGF-based therapies Mar 2020 20
10 Anti-VEGF-based therapies Apr 2020 21
# … with 36 more rows
> t2
# A tibble: 10 x 7
therapy_class `Dec 2019` `Feb 2020` `Mar 2020` `Apr 2020` `May 2020` `Jan 2020`
<ord> <chr> <chr> <chr> <chr> <chr> <chr>
1 ALK Inhibitors "16 <br>[2.7%]" "14 <br>[2.2%]" "22 <br>[3.3%]" "13 <br>[2.1%]" "17 <br>[3.4%]" ""
2 Anti-VEGF-based therapies "33 <br>[5.6%]" "36 <br>[5.7%]" "20 <br>[3.0%]" "21 <br>[3.4%]" "20 <br>[4.0%]" "35 <br>[5.0%]"
3 EGFR TKIs "52 <br>[8.8%]" "60 <br>[9.5%]" "52 <br>[7.8%]" "56 <br>[9.2%]" "49 <br>[9.8%]" "57 <br>[8.2%]"
4 EGFR-antibody based therapies "" "" "" "" "" ""
5 Non-platinum-based chemotherapy… "1 <br>[0.2%]" "4 <br>[0.6%]" "" "1 <br>[0.2%]" "" "4 <br>[0.6%]"
6 IO-based therapies "308 <br>[52.0%]" "330 <br>[52.3%]" "379 <br>[56.7%]" "345 <br>[56.4%]" "265 <br>[52.9%]" "385 <br>[55.1%]"
7 Platinum-based chemotherapy com… "123 <br>[20.8%]" "128 <br>[20.3%]" "134 <br>[20.1%]" "120 <br>[19.6%]" "107 <br>[21.4%]" "147 <br>[21.0%]"
8 Single agent chemotherapies "29 <br>[4.9%]" "17 <br>[2.7%]" "28 <br>[4.2%]" "25 <br>[4.1%]" "22 <br>[4.4%]" "33 <br>[4.7%]"
9 Other "30 <br>[5.1%]" "42 <br>[6.7%]" "33 <br>[4.9%]" "31 <br>[5.1%]" "21 <br>[4.2%]" "38 <br>[5.4%]"
10 <strong>Total</strong> "<strong>592</str… "<strong>631</str… "<strong>668</st… "<strong>612</st… "<strong>501</st… "<strong>699</st…
>
我正在尝试从长表中创建一个包含计数和百分比的宽表。这些列是“月年”,需要按顺序排列。我的问题是,当第一组(ALK 抑制剂)的某些“月年”缺少行时,列的顺序就会被打乱。缺少的“月年”位于末尾。长桌也不是固定桌。它是从用户可以选择月份年份范围的函数生成的。所以 Year_month 列可以有任何范围。
在此示例中,我使用了 2019 年 12 月至 2020 年 5 月的 6 个月范围。"df_1" 全部为 6 个月,因此生成的宽表符合预期。“df_2”缺少 2020 年 1 月的 ALK 抑制剂。所以结果表的末尾有“2020 年 1 月”。
这是我使用生成宽表的代码:
df_2 %>%
pivot_wider(names_from = Year_month, values_from = count) %>%
ungroup() %>%
mutate_at(.vars = vars(contains("20")), list(
~ ifelse(is.na(.), "", paste(., sprintf("<br>[%1.1f%%]", 100 * (. / sum(., na.rm = TRUE)))))
))
这是样本数据df_2
structure(list(therapy_class = structure(c(1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("ALK Inhibitors",
"Anti-VEGF-based therapies", "EGFR TKIs", "EGFR-antibody based therapies",
"Non-platinum-based chemotherapy combinations", "IO-based therapies",
"Platinum-based chemotherapy combinations", "Single agent chemotherapies",
"Other"), class = c("ordered", "factor")), Year_month = structure(c(2019.91666666667,
2020.08333333333, 2020.16666666667, 2020.25, 2020.33333333333,
2019.91666666667, 2020, 2020.08333333333, 2020.16666666667, 2020.25,
2020.33333333333, 2019.91666666667, 2020, 2020.08333333333, 2020.16666666667,
2020.25, 2020.33333333333, NA, 2019.91666666667, 2020, 2020.08333333333,
2020.25, 2019.91666666667, 2020, 2020.08333333333, 2020.16666666667,
2020.25, 2020.33333333333, 2019.91666666667, 2020, 2020.08333333333,
2020.16666666667, 2020.25, 2020.33333333333, 2019.91666666667,
2020, 2020.08333333333, 2020.16666666667, 2020.25, 2020.33333333333,
2019.91666666667, 2020, 2020.08333333333, 2020.16666666667, 2020.25,
2020.33333333333), class = "yearmon"), count = c(16L, 14L, 22L,
13L, 17L, 33L, 35L, 36L, 20L, 21L, 20L, 52L, 57L, 60L, 52L, 56L,
49L, NA, 1L, 4L, 4L, 1L, 308L, 385L, 330L, 379L, 345L, 265L,
123L, 147L, 128L, 134L, 120L, 107L, 29L, 33L, 17L, 28L, 25L,
22L, 30L, 38L, 42L, 33L, 31L, 21L)), row.names = c(NA, -46L), groups = structure(list(
therapy_class = structure(1:9, .Label = c("ALK Inhibitors",
"Anti-VEGF-based therapies", "EGFR TKIs", "EGFR-antibody based therapies",
"Non-platinum-based chemotherapy combinations", "IO-based therapies",
"Platinum-based chemotherapy combinations", "Single agent chemotherapies",
"Other"), class = c("ordered", "factor")), .rows = structure(list(
1:5, 6:11, 12:17, 18L, 19:22, 23:28, 29:34, 35:40, 41:46), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
这是包含月季度数据的新数据集
df <- structure(list(therapy_class = structure(c(1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L), .Label = c("ALK Inhibitors",
"Anti-VEGF-based therapies", "EGFR TKIs", "EGFR-antibody based therapies",
"Non-platinum-based chemotherapy combinations", "IO-based therapies",
"Platinum-based chemotherapy combinations", "Single agent chemotherapies",
"Other"), class = c("ordered", "factor")), quarter = structure(c(2020.75,
2021, 2021.25, 2020.5, 2020.75, 2021.25, 2020.5, 2020.75, 2021,
2021.25, NA, NA, 2020.5, 2020.75, 2021, 2021.25, 2021.5, 2020.5,
2020.75, 2021, 2021.25, 2021.5, 2020.5, 2020.75, 2021, 2021.25,
2021.5, 2020.5, 2020.75, 2021, 2021.25, 2021.5), class = "yearqtr"),
count = c(4L, 2L, 2L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, NA, NA,
252L, 184L, 190L, 180L, 120L, 59L, 46L, 63L, 47L, 25L, 4L,
4L, 1L, 2L, 1L, 16L, 12L, 21L, 21L, 11L)), row.names = c(NA,
-32L), groups = structure(list(therapy_class = structure(1:9, .Label = c("ALK Inhibitors",
"Anti-VEGF-based therapies", "EGFR TKIs", "EGFR-antibody based therapies",
"Non-platinum-based chemotherapy combinations", "IO-based therapies",
"Platinum-based chemotherapy combinations", "Single agent chemotherapies",
"Other"), class = c("ordered", "factor")), .rows = structure(list(
1:3, 4:6, 7:10, 11L, 12L, 13:17, 18:22, 23:27, 28:32), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -9L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
-- 由于缺少数据,季度日期乱序
therapy_class `2020 Q4` `2021 Q1` `2021 Q2` `2020 Q3` `NA` `2021 Q3`
<ord> <chr> <chr> <chr> <chr> <int> <chr>
1 ALK Inhibitors "4 [1.58%]" "2 [0.72%]" "2 [0.78%]" "" NA ""
2 Anti-VEGF-based therapies "2 [0.79%]" "" "3 [1.17%]" "3 [0.90%]" NA ""
3 EGFR TKIs "1 [0.40%]" "1 [0.36%]" "1 [0.39%]" "1 [0.30%]" NA ""
4 EGFR-antibody based therapies "" "" "" "" NA ""
5 Non-platinum-based chemotherapy combinations "" "" "" "" NA ""
6 IO-based therapies "184 [72.73%]" "190 [68.35%]" "180 [70.31%]" "252 [75.22%]" NA "120 [76.43%]"
7 Platinum-based chemotherapy combinations "46 [18.18%]" "63 [22.66%]" "47 [18.36%]" "59 [17.61%]" NA "25 [15.92%]"
8 Single agent chemotherapies "4 [1.58%]" "1 [0.36%]" "2 [0.78%]" "4 [1.19%]" NA "1 [0.64%]"
9 Other "12 [4.74%]" "21 [7.55%]" "21 [8.20%]" "16 [4.78%]" NA "11 [7.01%]"
10 Total "253" "278" "256" "335" 0 "157"