我正在尝试将我的一些 R 代码移植到 Julia;基本上我在 Julia 中重写了以下 R 代码:
library(parallel)
eps_1<-rnorm(1000000)
eps_2<-rnorm(1000000)
large_matrix<-ifelse(cbind(eps_1,eps_2)>0,1,0)
matrix_to_compare = expand.grid(c(0,1),c(0,1))
indices<-seq(1,1000000,4)
large_matrix<-lapply(indices,function(i)(large_matrix[i:(i+3),]))
function_compare<-function(x){
which((rowSums(x==matrix_to_compare)==2) %in% TRUE)
}
> system.time(lapply(large_matrix,function_compare))
user system elapsed
38.812 0.024 38.828
> system.time(mclapply(large_matrix,function_compare,mc.cores=11))
user system elapsed
63.128 1.648 6.108
可以注意到,从一个核心升级到 11 个核心时,我的速度得到了显着提升。现在我正在尝试在 Julia 中做同样的事情:
#Define cluster:
addprocs(11);
using Distributions;
@everywhere using Iterators;
d = Normal();
eps_1 = rand(d,1000000);
eps_2 = rand(d,1000000);
#Create a large matrix:
large_matrix = hcat(eps_1,eps_2).>=0;
indices = collect(1:4:1000000)
#Split large matrix:
large_matrix = [large_matrix[i:(i+3),:] for i in indices];
#Define the function to apply:
@everywhere function function_split(x)
matrix_to_compare = transpose(reinterpret(Int,collect(product([0,1],[0,1])),(2,4)));
matrix_to_compare = matrix_to_compare.>0;
find(sum(x.==matrix_to_compare,2).==2)
end
@time map(function_split,large_matrix )
@time pmap(function_split,large_matrix )
5.167820 seconds (22.00 M allocations: 2.899 GB, 12.83% gc time)
18.569198 seconds (40.34 M allocations: 2.082 GB, 5.71% gc time)
正如人们注意到的那样,我没有得到任何 pmap 的加速。也许有人可以提出替代方案。