0

我希望根据多个变量从计数数据中计算比例。在下面的示例数据集中,我想知道每个物种在每个日期的每个分数的比例。例如,对于 2019-09-16 的所有 MCAP,2 的比例是多少?

structure(list(date = structure(c(18155, 18155, 18155, 18155, 
18155, 18155, 18155, 18155, 18171, 18171, 18171, 18185, 18185, 
18185, 18185, 18185, 18185, 18185, 18185, 18185), class = "Date"), 
species = c("MCAP", "MCAP", "PCOM", "MCAP", "MCAP", "MCAP", 
"PCOM", "PCOM", "PCOM", "PCOM", "PCOM", "MCAP", "MCAP", "MCAP", 
"MCAP", "PCOM", "PCOM", "PCOM", "PCOM", "PCOM"), score = c(2, 
2, 2, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3)), row.names = c(1L, 
2L, 3L, 4L, 8L, 33L, 37L, 38L, 7912L, 7931L, 7947L, 8543L, 8544L, 
8545L, 8547L, 12946L, 12947L, 12948L, 12949L, 12950L), class = "data.frame")
4

2 回答 2

2

tidyverse,特别是 dplyr 包可以帮助解决这个问题。我认为有很多方法可以使用这些包来解决这个问题,但这是我想到的第一个。

library(tidyverse)

data <- structure(list(date = structure(c(18155, 18155, 18155, 18155, 
                              18155, 18155, 18155, 18155, 18171, 18171, 18171, 18185, 18185, 
                              18185, 18185, 18185, 18185, 18185, 18185, 18185), class = "Date"), 
           species = c("MCAP", "MCAP", "PCOM", "MCAP", "MCAP", "MCAP", 
                       "PCOM", "PCOM", "PCOM", "PCOM", "PCOM", "MCAP", "MCAP", "MCAP", 
                       "MCAP", "PCOM", "PCOM", "PCOM", "PCOM", "PCOM"), score = c(2, 
                                                                                  2, 2, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3)), row.names = c(1L, 
                                                                                                                                                           2L, 3L, 4L, 8L, 33L, 37L, 38L, 7912L, 7931L, 7947L, 8543L, 8544L, 
                                                                                                                                                           8545L, 8547L, 12946L, 12947L, 12948L, 12949L, 12950L), class = "data.frame")

data_sum <- data %>%
  ##This groups and counts the occurrences
  group_by(species,date,score) %>% 
  tally() %>% 
  ungroup() %>% 
  ##this groups and calculates the proportion for the groups above without the score.
  group_by(species,date) %>% 
  mutate(prop = n/sum(n))

data_sum %>%
  filter(date == '2019-09-16',
         species == 'MCAP')

  species date       score     n  prop
  <chr>   <date>     <dbl> <int> <dbl>
1 MCAP    2019-09-16     2     3   0.6
2 MCAP    2019-09-16     3     2   0.4
于 2020-04-16T14:20:51.983 回答
1

在基础 R 中,我们可以使用tableand来做到这一点prop.table。您可以调整margin参数以更改比例的分母。代码很短,结果对于控制台中的表格显示相对较好,但不是很好,因为它不是用于合并到其他分析中的数据框。为此,我建议dplyr在另一个答案中采用这种方法。

with(d, prop.table(table(species, score, date), margin = c(1, 3)))
# , , date = 2019-09-16
# 
#        score
# species         2         3
#    MCAP 0.6000000 0.4000000
#    PCOM 0.3333333 0.6666667
# 
# , , date = 2019-10-02
# 
#        score
# species         2         3
#    MCAP                    
#    PCOM 0.3333333 0.6666667
# 
# , , date = 2019-10-16
# 
#        score
# species         2         3
#    MCAP 0.5000000 0.5000000
#    PCOM 0.0000000 1.0000000
于 2020-04-16T14:24:39.087 回答