0

我的 dcast Rcode 不再运行。我有这里讨论的问题:segfault in R using reshape2 package and dcast

该错误尚未修复,因此我正在寻找其他方法来实现我的 dcast 输出。任何建议将不胜感激!

在我的数据集的一个非常小的输入之下。基本上,每个调查 ID(“EID”)每个物种都有一个条目。我想为每个调查 ID(“EID”)获取一个条目,其中我的所有物种作为列及其相关值(“值”),即宽格式。

> dput(sample)
structure(list(EID = c("L00155/69/2000-09-06", "Q99107/178/1999-08-23", 
"G02192/1/2002-07-08", "G97158/1/1997-10-26", "Q06091/2/2006-07-04", 
"L00004/171/2000-03-01", "G11094/15/2011-09-05", "Q04127/16/2004-07-28", 
"Q02122/230/2002-10-29", "G08002/6/2008-02-03", "Q99006/143/1999-02-17", 
"Q08053/3/2008-06-12", "Q99128/22/1999-08-19", "L00177/83/2000-12-18", 
"Q05122/11/2005-08-30", "Q04156/44/2004-10-29", "L01097/69/2001-06-26", 
"G08004/169/2008-05-14", "Q03041/26/2003-06-14", "G98115/60/1998-09-11", 
"G00002/20/2000-01-17", "G00002/20/2000-01-17", "G00054/1/2000-05-31", 
"G00054/1/2000-05-31"), tspp.name = structure(c(13L, 13L, 13L, 
13L, 16L, 13L, 13L, 4L, 13L, 13L, 13L, 13L, 13L, 11L, 4L, 13L, 
13L, 13L, 13L, 20L, 13L, 13L, 24L, 24L), .Label = c("American plaice", 
"American sand lance", "Arctic cod", "Atlantic cod", "Atlantic halibut", 
"Atlantic herring", "Bigeye tuna", "Black dogfish", "Bluefin tuna", 
"Capelin", "Greenland halibut", "Lookdown", "Northern shrimp", 
"Ocean quahog", "Porbeagle", "Redfishes", "Slenteye headlightfish", 
"Smooth flounder", "Spiny dogfish", "Striped pink shrimp", "Summer flounder", 
"White hake", "Winter flounder", "Witch flounder", "Yellowtail flounder"
), class = "factor"), elasmo.name = structure(c(26L, 30L, 30L, 
30L, 30L, 25L, 21L, 30L, 30L, 30L, 30L, 21L, 30L, 5L, 30L, 30L, 
30L, 21L, 30L, 30L, 14L, 21L, 24L, 21L), .Label = c("Arctic skate", 
"Atlantic sharpnose shark", "Barndoor skate", "Basking shark", 
"Black dogfish", "Blue shark", "Deepsea cat shark", "Greenland shark", 
"Jensen's skate", "Little skate", "Manta", "Ocean quahog", "Oceanic whitetip shark", 
"Porbeagle", "Portuguese shark", "Rough sagre", "Roughtail stingray", 
"Round skate", "Sharks", "Shortfin mako", "Skates", "Smooth skate", 
"Soft skate", "Spiny dogfish", "Spinytail skate", "Thorny skate", 
"White shark", "White skate", "Winter skate", "NA"), class = "factor"), 
    elasmo.discard = c(1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 
    25, 0, 0, 0, 1, 0, 0, 1, 1, 15, 25)), .Names = c("EID", "tspp.name", 
"elasmo.name", "elasmo.discard"), class = "data.frame", row.names = c("18496", 
"488791", "87549", "236671", "139268", "15606", "11132", "115531", 
"93441", "159675", "403751", "42587", "485941", "19285", "130395", 
"119974", "73826", "7953", "99124", "351461", "71", "72", "184", 
"185"))

最后,我希望得到这个:

library(plyr)
test<-dcast(sample, ...~elasmo.name,value.var ="elasmo.discard",fun.aggregate=sum)
test

请注意,“dcast”代码在这里有效,但是当我在具有 145349 行的整个数据集上运行它时,我确实遇到了致命错误。

非常感谢!!

4

2 回答 2

1

这将是之前的 Hadley 方法;首先聚合得到总和,然后重塑。

foo <- aggregate(d[,4,drop=FALSE], by=d[,1:3], sum)
reshape(foo, v.names="elasmo.discard", idvar=c("EID", "tspp.name"), 
             timevar="elasmo.name", direction="wide")

如果第一部分速度较慢,则“by”部分中的列可能会有所减少;它看起来tspp.name是由 定义的EID,如果是这样,不要按它聚合,而是在事后添加它。

如果第二部分很慢,不妨试试这里的方法之一: https ://stackoverflow.com/a/9617424/210673 。

为了更好地帮助加速它,请提供一个可以测试代码的适当示例(可能使用示例或代表)。求解速度通常取决于每个变量有多少独特组合。

于 2013-10-08T20:35:33.283 回答
0

我无法重现该错误。请参阅随附的代码。我增加了sampleto的行数196608

可能类别的数量在sample$elasmo.name起作用。

library(reshape2)

sample <- structure(list(EID = c("L00155/69/2000-09-06", "Q99107/178/1999-08-23", 
  "G02192/1/2002-07-08", "G97158/1/1997-10-26", "Q06091/2/2006-07-04", 
  "L00004/171/2000-03-01", "G11094/15/2011-09-05", "Q04127/16/2004-07-28", 
  "Q02122/230/2002-10-29", "G08002/6/2008-02-03", "Q99006/143/1999-02-17", 
  "Q08053/3/2008-06-12", "Q99128/22/1999-08-19", "L00177/83/2000-12-18", 
  "Q05122/11/2005-08-30", "Q04156/44/2004-10-29", "L01097/69/2001-06-26", 
  "G08004/169/2008-05-14", "Q03041/26/2003-06-14", "G98115/60/1998-09-11", 
  "G00002/20/2000-01-17", "G00002/20/2000-01-17", "G00054/1/2000-05-31", 
  "G00054/1/2000-05-31"), tspp.name = structure(c(13L, 13L, 13L, 
  13L, 16L, 13L, 13L, 4L, 13L, 13L, 13L, 13L, 13L, 11L, 4L, 13L, 
  13L, 13L, 13L, 20L, 13L, 13L, 24L, 24L), .Label = c("American plaice", 
  "American sand lance", "Arctic cod", "Atlantic cod", "Atlantic halibut", 
  "Atlantic herring", "Bigeye tuna", "Black dogfish", "Bluefin tuna", 
  "Capelin", "Greenland halibut", "Lookdown", "Northern shrimp", 
  "Ocean quahog", "Porbeagle", "Redfishes", "Slenteye headlightfish", 
  "Smooth flounder", "Spiny dogfish", "Striped pink shrimp", "Summer flounder", 
  "White hake", "Winter flounder", "Witch flounder", "Yellowtail flounder"
  ), class = "factor"), elasmo.name = structure(c(26L, 30L, 30L, 
  30L, 30L, 25L, 21L, 30L, 30L, 30L, 30L, 21L, 30L, 5L, 30L, 30L, 
  30L, 21L, 30L, 30L, 14L, 21L, 24L, 21L), .Label = c("Arctic skate", 
  "Atlantic sharpnose shark", "Barndoor skate", "Basking shark", 
  "Black dogfish", "Blue shark", "Deepsea cat shark", "Greenland shark", 
  "Jensen's skate", "Little skate", "Manta", "Ocean quahog", "Oceanic whitetip shark", 
  "Porbeagle", "Portuguese shark", "Rough sagre", "Roughtail stingray", 
  "Round skate", "Sharks", "Shortfin mako", "Skates", "Smooth skate", 
  "Soft skate", "Spiny dogfish", "Spinytail skate", "Thorny skate", 
  "White shark", "White skate", "Winter skate", "NA"), class = "factor"), 
      elasmo.discard = c(1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 
      25, 0, 0, 0, 1, 0, 0, 1, 1, 15, 25)), .Names = c("EID", "tspp.name", 
  "elasmo.name", "elasmo.discard"), class = "data.frame", row.names = c("18496", 
  "488791", "87549", "236671", "139268", "15606", "11132", "115531", 
  "93441", "159675", "403751", "42587", "485941", "19285", "130395", 
  "119974", "73826", "7953", "99124", "351461", "71", "72", "184", 
  "185"))

n <- nrow(sample)
N <- 145349
p <- ceiling(log2(N / n))
n * 2^p
n * 2^p > N

# Bad way of increasing the row number
for (i in 1:p) sample <- rbind(sample, sample)

nrow(sample)

class(sample)
head(sample)

table(sample$elasmo.name)
table(as.character(sample$elasmo.name))

test <- dcast(sample, ... ~ elasmo.name,
              value.var = "elasmo.discard",
              fun.aggregate = sum)
head(test)
于 2013-10-09T08:46:24.397 回答