0

我是一个 R 新手,但我正在尝试使用 acast、ff、ffbase 包来创建一个相当大的 R 对象(5 mil 行 x ~17k cols)——在引入并保存了 SQLServer 数据集之后融化成更小的数据框块(当然,不能用一个简单的融化和 dcast 创建这个对象)。所有的块都有相同的列。当我到达 ffdfappend 时,R 崩溃 - Windows 崩溃(R for Windows GUI 前端已停止工作)。在这一点上,我的主要问题是,我是否正确使用了 ffdfappend?

Windows Server 2008 - 64 位 R 2.15.3 - 64 位

我的“分块”代码:

library(RODBC);
library(reshape2);
library(ff);
library(ffbase);

db <- odbcConnect(foo);

#agg function used in the acast
iSequence <- function(x){
 if(is.na(min(x))) {
  return('N');
 } else {
  if(min(x) == 1) {
   return('P');
  } else {
   return('S');
  }
 }
}

#pull in data from SS
data.raw <- sqlQuery(db,"
SELECT 
key
,type
,val
FROM table
WHERE val IS NOT NULL
",stringsAsFactors=FALSE);

#melt
data.melt <- melt(data.raw,id=c("key","type"),measure="val");

#get list of unique first 4 digits of key - good enough granularity for chunk
data.char <- unique(substr(data.melt$key,1,4));

#create list where the chunked casts will reside
data.df.list <- vector("list",length(data.char));

#get list of unique column names
data.type.unique <- unique(data.melt$type);

#chunk counter
chunk.count <- 1;

#cast by chunk 
for(i in data.char) {
 print(paste(chunk.count, '/', length(data.char)));
 tempcast <- acast(data.melt[substr(data.melt$key,1,4)==i,],key~type,fun.aggregate=iSequence);
 #create list item with all N
 templist <- matrix(
  data="N",
  nrow=nrow(tempcast),
  ncol=length(data.type.unique),
  dimnames=list(rownames(tempcast),data.type.unique)
 );
 #replace columns that are in data.type.unique but not in tempcast
 templist[,which(data.type.unique %in% colnames(tempcast))] <- tempcast;
 #put into final cast list
 data.df.list[[i]] <- as.data.frame(templist);
 rm(tempcast);
 rm(templist);
 gc();
 chunk.count <- chunk.count + 1;
}

所以现在我有了所有的块,很棒(数据看起来很好,有效等)。当我在两个块上测试 ffdfappend 时(这些值有效),R 崩溃:

#this works 
#unsort is in there because I get an error saying this is sorted otherwise
t1 <- NULL;
t1 <- unsort(ffdfappend(t1,data.df.list[["1174"]]));
t2 <- NULL;
t2 <- unsort(ffdfappend(t2,data.df.list[["1175"]]));

#this crashes R
t1 <- ffdfappend(t2,t1);

我正确使用 ffdfappend 吗?谢谢!

4

0 回答 0