1

我正在尝试使用RcppParrallel来估计两个 3D 矩阵的行之间的距离并返回一个新矩阵。我看到了使用Parallel Distance Matrix Calculation的例子parallelFor,但这些计算来自matrix一个固定大小的计算。

例如,假设我有两个矩阵amat,并且bmatnrow它们之间的 可能不同,但ncol总是3。在 R 中,这些可能看起来像:

set.seed(10); amat <- matrix(rnorm(9, 2, 0.5), ncol = 3)  
set.seed(50); bmat <- matrix(rnorm(9, 2, 0.5), ncol = 3)

使用这个例子,预期的输出是 amatrixnrow = amat.row()*bmat.row() = 95 列(1 的row索引amat,2:4 的rowbmat,5 矩阵行之间的欧几里德距离。像这样:

    points        X        Y        Z  distance
 [1,]      0 1.579198 1.136198 1.704544 0.7737024
 [2,]      0 2.274835 2.262075 2.180414 1.0006478
 [3,]      0 2.016499 1.861068 2.487795 1.1036122
 [4,]      1 2.274835 2.262075 2.180414 0.5282677
 [5,]      1 2.016499 1.861068 2.487795 0.7362889
 [6,]      1 1.579198 1.136198 1.704544 1.0692094
 [7,]      2 1.579198 1.136198 1.704544 1.2079720
 [8,]      2 2.274835 2.262075 2.180414 1.3836957
 [9,]      2 2.016499 1.861068 2.487795 1.5157243

这是我到目前为止的代码,在RcppParallel示例中得到启发

// [[Rcpp::depends(RcppParallel)]] 
#include <RcppParallel.h>
using namespace RcppParallel;

struct Mdistance : public Worker { //function object

  // input 3D-matrix
  const RMatrix<double> amat;
  const RMatrix<double> bmat;

  // output matrix to write to
  RMatrix<double> rmat;

  // initialize from Rcpp input and output matrixes
  Mdistance(const NumericMatrix amat, const NumericMatrix bmat, NumericMatrix rmat)
    : amat(amat), bmat(bmat), rmat(rmat) {}

  // function call operator that work for the specified range (begin/end) #Not sure of this part
  void operator()(std::size_t begin, std::size_t end) {
    for (std::size_t i = 0; i < amat.nrow(); i++) {
      for (std::size_t j = 0; j < bmat.nrow(); j++) {

        // write to output matrix
           rmat((i + (j * amat.nrow())), 0) = i + 1; //Row index of amat
           rmat((i + (j * amat.nrow())), 1) = bmat(j, 0); //Value of column 0 of bmat
           rmat((i + (j * amat.nrow())), 2) = bmat(j, 1); //Value of column 1 of bmat
           rmat((i + (j * amat.nrow())), 3) = bmat(j, 2); //Value of column 2 of bmat
           rmat((i + (j * amat.nrow())), 4) = sqrt((pow(bmat(j,0) - amat(i,0), 2.0) + pow(bmat(j, 1) - amat(i, 1), 2.0) + pow(bmat(j, 2) - amat(i, 2), 2.0))); //Euclidean distance between rows
      }
    }
  }
};

// [[Rcpp::export]]
NumericMatrix Mdistance_parallel(NumericMatrix amat, NumericMatrix bmat) {

  // allocate the matrix we will return
  NumericMatrix rmat((amat.nrow()*bmat.nrow()), 5);

  // create the worker
  Mdistance Mdistance(amat, bmat, rmat);

  // call it with parallelFor
  parallelFor(0, (amat.nrow()*bmat.nrow()), MDistance);

  return rmat;
}

知道如何使用RcppParallel使其工作吗?显然,我使用并行是因为nrow并且amat往往bmat接近 1000 万。我正在使用基于foreachin 的其他例程R。但是,它需要很长时间(> 1 天)并且似乎不稳定。

谢谢...

编辑

这是我仅使用 Rcpp 的示例

#include <Rcpp.h>
#include <cmath>
#include <algorithm>
using namespace Rcpp;

// [[Rcpp::export]]
NumericMatrix rcpp_distance(NumericMatrix amat, NumericMatrix bmat) {

  // allocate the matrix we will return
  NumericMatrix rmat((amat.nrow()*bmat.nrow()), 5);

  for (int i = 0; i < amat.nrow(); i++) {
    for (int j = 0; j < bmat.nrow(); j++) {

      rmat((i + (j * amat.nrow())), 0) = i + 1; //Row index of amat
      rmat((i + (j * amat.nrow())), 1) = bmat(j, 0); //Value of column 0 of bmat
      rmat((i + (j * amat.nrow())), 2) = bmat(j, 1); //Value of column 1 of bmat
      rmat((i + (j * amat.nrow())), 3) = bmat(j, 2); //Value of column 2 of bmat
      rmat((i + (j * amat.nrow())), 4) = sqrt((pow(bmat(j,0) - amat(i,0), 2.0) + pow(bmat(j, 1) - amat(i, 1), 2.0) + pow(bmat(j, 2) - amat(i, 2), 2.0))); //Euclidean distance between rows
    }
  }

  return rmat;
}
4

1 回答 1

0

我已经解决了。但是这是对的,为此分配的内存太大。

这是解决方案。

// [[Rcpp::depends(RcppParallel)]]
#include <Rcpp.h>
#include <RcppParallel.h>
using namespace Rcpp;
using namespace RcppParallel;

struct Mdistance : public Worker { //function object

  // input 3D-matrix
  const RMatrix<double> amat;
  const RMatrix<double> bmat;

  // output matrix to write to
  RMatrix<double> rmat;

  // initialize from Rcpp input and output matrixes
  Mdistance(const NumericMatrix amat, const NumericMatrix bmat, NumericMatrix rmat)
    : amat(amat), bmat(bmat), rmat(rmat) {}

  // function call operator that work for the specified range (begin/end) #Not sure of this part
  void operator()(std::size_t begin, std::size_t end) {
    for (std::size_t i = begin; i < end; i++) {

        for (std::size_t j = 0; j < bmat.nrow(); j++) {

          // write to output matrix
          rmat((i + (j * amat.nrow())), 0) = i + 1; //Row index of amat
          rmat((i + (j * amat.nrow())), 1) = bmat(j, 0); //Value of column 0 of bmat
          rmat((i + (j * amat.nrow())), 2) = bmat(j, 1); //Value of column 1 of bmat
          rmat((i + (j * amat.nrow())), 3) = bmat(j, 2); //Value of column 2 of bmat
          rmat((i + (j * amat.nrow())), 4) = sqrt((pow(bmat(j,0) - amat(i,0), 2.0) + pow(bmat(j, 1) - amat(i, 1), 2.0) + pow(bmat(j, 2) - amat(i, 2), 2.0))); //Euclidean distance between rows

        }
    }
  }
};

// [[Rcpp::export]]
NumericMatrix Mdistance_parallel(NumericMatrix amat, NumericMatrix bmat) {

  // allocate the matrix we will return
  NumericMatrix rmat((amat.nrow()*bmat.nrow()), 5);

  // create the worker
  Mdistance Mdistance(amat, bmat, rmat);

  // call it with parallelFor
  parallelFor(0, amat.nrow(), Mdistance);

  return rmat;
}

/*** R
set.seed(10); amat <- matrix(rnorm(9, 2, 0.5), ncol = 3)  
set.seed(50); bmat <- matrix(rnorm(9, 2, 0.5), ncol = 3)

Mdistance_parallel(amat, bmat)
*/
于 2020-01-06T19:14:33.940 回答