每当我用 apply 语句替换 for 循环时,我的 R 脚本运行得更快,但这里有一个例外。我在正确使用 apply 系列方面仍然缺乏经验,那么我可以对 apply 语句做些什么来超越(即变得更快)比 for 循环?
示例数据:
vc<-as.character(c("120,129,129,114","103,67,67,67,67,10,10,10,12","2,1,1,1,2,4,3,1,1,1,3,2,1,1","1,3,1,1,1,1,1,4",NA,"5","1,1,99","2,2,2,16,11,11,11,11,11,29,29,26,26,26,26,26,26,26,26,26,26,31,24,29,29,29,29,40,24,23,3,3,3,6,6,4,5,4,4,3,3,4,4,6,8,8,6,6,6,5,3,3,4,4,5,5,4,4,4,4,6,11,10,11,10,14,2,2,22,22,22,22,24,24,24,23,24,24,24,23,24,23,23,23,24,25,27,27,24,24,26,24,25,25,24,25,26,29,31,32,32,32,32,33,32,35,35,35,52,44,37,26","20,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,19,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,19,19,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,1,1,1,12,10","67,63,73,70,75,135,94,94,96,94,95,96,96,97,94,94,94,94,24,24,24,24,24,24,24,24,24,24,24,1,1,1"))
目标是填充一个数字矩阵 m.res,其中每一行包含 vc 中每个元素的前 3 个值。这是for循环:
fx.test1
function(vc)
{
m.res<-matrix(ncol=3, nrow=length(vc))
for (j in 1:length(vc))
{vn<-as.numeric(unlist(strsplit(vc[j], split=",")))
vn[is.na(vn)]<-0; vn2<-rev(sort(vn))
m.res[j,]<-vn2[1:3]
}
}
下面是我的“应用解决方案”。为什么它更慢?我怎样才能让它更快?谢谢!
fx.test2
function(vc)
{
m.res<-matrix(ncol=3, nrow=length(vc))
vc[is.na(vc)]<-"0"
ls.vc<-sapply(vc, function(x) tail(sort(as.numeric(unlist(strsplit(x, split=",")))),3), simplify=TRUE)
#names(ls.vc)<-seq(1:length(vc))
ls.vc2<-lapply(ls.vc, function(x) c(as.numeric(x), rep(0, times = 3 - length(x))))
m.res<-as.matrix(t(as.data.frame(ls.vc)))
return(m.res)
}
system.time(m.res<-fx.test1(vc))
# user system elapsed
# 0.001 0.000 0.001
system.time(m.res<-fx.test2(vc))
# user system elapsed
# 0.003 0.000 0.003
更新:我遵循@John 的建议并生成了两个经过修剪且真正等效的函数。事实上,我能够稍微加快 lapply 函数的速度,但它仍然比 for 循环慢。如果您碰巧对如何优化这些功能以提高速度有任何想法,请告诉我。谢谢你们。
fx.test3<-function(vc)
{
L<-strsplit(vc,split=",")
m.res<-matrix(ncol=3, nrow=length(vc))
for (j in 1:length(vc))
{
m.res[j,]<-sort(c(as.numeric(L[[j]]),rep(0,3)), decreasing=TRUE)[1:3]
}
return(m.res)
}
fx.test4<-function(vc)
{
L<-strsplit(vc, split=",")
D<-t(as.data.frame(lapply(L, function(X) {sort(c(as.numeric(X),rep(0,3)),decreasing=TRUE)[1:3]})))
row.names(D)<-NULL
m.res<-as.matrix(D)
return(m.res)
}
system.time(fx.test3(vc))
# user system elapsed
# 0.001 0.000 0.001
system.time(fx.test4(vc))
# user system elapsed
# 0.002 0.000 0.002