0

我有一个包含 3 个变量的数据框:ID、Taxa 和 EstimatedNumber。我正在寻找一种简单的方法来转换这些数据,以便为 NMDS 做好准备。本质上,我希望 ID 保留为第一列,但随后的每一列都是因子分类单元中的每个级别。最后,EstimatedNumber 中的值填充矩阵中的单元格。

这是我的数据的一个子集。

structure(list(FishID = structure(c(50L, 50L, 51L, 52L, 52L, 
55L, 55L, 55L, 55L, 55L, 56L, 56L, 67L, 67L, 67L, 70L, 70L, 65L, 
65L, 71L), .Label = c("SSM002", "SSM004", "SSM005A", "SSM005B", 
"SSM006", "SSM007", "SSM009", "SSM012", "SSM013", "SSM014", "SSM016", 
"SSM017", "SSM018", "SSM019", "SSM020", "SSM021", "SSM022", "SSM023", 
"SSM024A", "SSM024B", "SSM025", "SSM026", "SSM027", "SSM030", 
"SSM031", "SSM032", "SSM033", "SSM034", "SSM035", "SSM036", "SSM037", 
"SSM038", "SSM039", "SSM040", "SSM041", "SSM043", "SSM044", "SSM045", 
"SSM046", "SSM047", "SSM048", "SSM052", "SSM053", "SSM054", "SSM055", 
"SSM056", "SSM057", "SSM058", "SSM059", "SSS001", "SSS002", "SSS003", 
"SSS004", "SSS005", "SSS006", "SSS007", "SSS008", "SSS009", "SSS010", 
"SSS011", "SSS012", "SSS013", "SSS014", "SSS015", "SSS016", "SSS017A", 
"SSS017B", "SSS018", "SSS019", "SSS020", "SSS022"), class = "factor"), 
    Taxa = c("Onisimus", "Gammarus", "Unidentified", "Fish", 
    "Amphipods", "Gammarus", "Onisimus", "Gammarus", "Jellyfish", 
    "Unidentified", "Onisimus", "Unidentified", "Onisimus", "Unidentified", 
    "Gammarus", "Onisimus", "Fish", "Onisimus", "Jellyfish", 
    "Fish"), EstimatedNumber = c(1305L, 103L, NA, 1L, NA, 3L, 
    4L, 4L, 1L, NA, 32L, NA, 45L, NA, 1L, 1122L, 12L, 3L, 8L, 
    8L)), row.names = c(NA, 20L), class = "data.frame")

这是我正在寻找的一个例子。

   FishID Onisimus Gammarus
1  SSS001     1305      103
2  SSS002        0        0
3  SSS003        0        0
4  SSS006        4        3
5  SSS007       32        0
6 SSS017B       45        1
4

2 回答 2

1

使用包reshape2

df_reshaped <-  reshape2::dcast(df,FishID ~ Taxa,value.var="EstimatedNumber",fun.aggregate = sum)

请注意,您的数据库中有两个 SSS006 X gammarus 和 NA。

于 2020-05-05T16:40:48.053 回答
1

您可以使用pivot_wider包中的tidyverse来执行此操作。

library(tidyverse)

df_reshaped <-  df %>%
  #Change from long to wide format
  pivot_wider(id_cols = FishID,
              names_from = Taxa,
              values_from = EstimatedNumber,
              # Sum the entries where there is more than 1 value in EstimatedNumber
              values_fn = list(EstimatedNumber = sum))

# FishID  Onisimus Gammarus Unidentified  Fish Amphipods Jellyfish
# <fct>      <int>    <int>        <int> <int>     <int>     <int>
#   1 SSS001      1305      103           NA    NA        NA        NA
# 2 SSS002        NA       NA           NA    NA        NA        NA
# 3 SSS003        NA       NA           NA     1        NA        NA
# 4 SSS006         4        7           NA    NA        NA         1
# 5 SSS007        32       NA           NA    NA        NA        NA
# 6 SSS017B       45        1           NA    NA        NA        NA
# 7 SSS020      1122       NA           NA    12        NA        NA
# 8 SSS016         3       NA           NA    NA        NA         8
# 9 SSS022        NA       NA           NA     8        NA        NA
于 2020-05-05T16:53:33.173 回答