我正在使用包数据表,我想知道我在下面使用的矢量扫描的替代方案是什么。
x<-data.table(v1=1:30, v2=sample(c("a", "b"), 30, replace=TRUE),
v3=sample(1:30, 30, replace=TRUE))
x[(x$v2=="a" & x$v3>10) | (x$v2=="b" & x$v3<20),]
谢谢你,Z
感谢您的回复:我尝试了它们,但没有看到明显的差异。以下是其他尝试:
x<-data.table(v1=1:30000000, v2=sample(c("a", "b"), 30000000, replace=TRUE),
v3=sample(1:30, 30000000, replace=TRUE))
ptm <- proc.time()
x[(x$v2=="a" & x$v3>10) | (x$v2=="b" & x$v3<20),]
proc.time() - ptm # 4.37
ptm <- proc.time()
x[(v2=="a" & v3>10) | (v2=="b" & v3<20),]
proc.time() - ptm # 4.29
ptm <- proc.time()
rbind(x[(v2=="a" & v3>10)], x[(v2=="b" & v3<20)])
proc.time() - ptm # 5.03
setkey(x, "v2")
ptm <- proc.time()
rbind(x[(v2=="a" & v3>10)], x[(v2=="b" & v3<20)])
proc.time() - ptm # 4.35 (not incl. time of setkey)
setkey(x, "v2")
ptm <- proc.time()
rbindlist(list(x[(v2=="a" & v3>10)], x[(v2=="b" & v3<20)]))
proc.time() - ptm # 3.87 (not incl. time of setkey)
setkey(x, "v2", "v3")
ptm <- proc.time()
dim(x[J(c('a','b'))][(v2 == 'a' & v3 > 10) | (v2 == 'b' & v3 < 20)])
proc.time() - ptm #4.16 (not incl time of setkey)