1 最简单的方法是基数 R。
tbl <- table(df1[[1]], df1[[2]])
100*tbl/rowSums(tbl)
# Code1 Code2 Code3 Code4 Code5
# Dept. 1 100.00000 0.00000 0.00000 0.00000 0.00000
# Dept. 2 0.00000 100.00000 0.00000 0.00000 0.00000
# Dept. 3 0.00000 0.00000 66.66667 33.33333 0.00000
# Dept. 4 0.00000 0.00000 0.00000 40.00000 60.00000
还有一个。
xtb <- xtabs(~ Department + Code, df1)
100*xtb/rowSums(xtb)
2 以下是使用dplyr
和的解决方案tidyr
。
library(dplyr)
library(tidyr)
df1 %>%
group_by(Department) %>%
mutate(d = n()) %>%
group_by(Department, Code) %>%
summarise(Perc = n()/first(d), .groups = "drop") %>%
pivot_wider(
id_cols = Department,
names_from = Code,
values_from = Perc
)
## A tibble: 4 x 6
# Department Code1 Code2 Code3 Code4 Code5
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 Dept. 1 1 NA NA NA NA
#2 Dept. 2 NA 1 NA NA NA
#3 Dept. 3 NA NA 0.667 0.333 NA
#4 Dept. 4 NA NA NA 0.4 0.6
要在有NA
's 的地方获得带有零的百分比值,只需进行简单的更改即可。
df1 %>%
group_by(Department) %>%
mutate(d = n()) %>%
group_by(Department, Code) %>%
summarise(Perc = 100 * n()/first(d), .groups = "drop") %>%
pivot_wider(
id_cols = Department,
names_from = Code,
values_from = Perc,
values_fill = 0
)
数据
df1 <-
structure(list(Department = c("Dept. 1", "Dept. 2", "Dept. 3",
"Dept. 3", "Dept. 3", "Dept. 4", "Dept. 4", "Dept. 4", "Dept. 4",
"Dept. 4"), Code = c("Code1", "Code2", "Code3", "Code3", "Code4",
"Code4", "Code4", "Code5", "Code5", "Code5")), row.names = c(NA,
-10L), class = "data.frame")