

var idx = {1..n};
var adom = {idx, idx};
var A: [adom] int;
//populate A;

var rowsums: [idx] int; 



2 回答 2



config const       n = 8;          // "naked" n would cause compilation to fail
const indices = 1..n;              // tio.chpl:1: error: 'n' undeclared (first use this function)
const adom = {indices, indices};
var A: [adom] int;

// Populate A
[(i,j) in adom] A[i, j] = i*j;

var rowsums: [indices] int;

forall i in indices {
  rowsums[i] = + reduce(A[i, ..]);



这是利用对数组切片+ 减少A

请注意, 和 都在forall上面+ reduce的程序中引入了并行性。如果 的大小足够小,则仅使用for循环可能会更有效,从而避免产生任务的开销。indices

于 2017-08-16T23:12:31.280 回答






有关确切的运行时性能的记录,(参考自我记录的表格),或者不要犹豫访问实时 IDE 运行(上面的参考)并自己进行实验。

<SECTION-UNDER-TEST>读者也可能会在小规模实验中认识到外部噪音,因为与​​ O/S 和托管 IDE 相关的进程会通过不利的 CPU / Lx-CACHE / memIO / process / et al 冲突干预资源使用并影响运行时性能,这一事实排除了这些测量值被用于一些广义解释。


/* ---------------------------------------SETUP-SECTION-UNDER-TEST--*/ use Time;
/* ---------------------------------------SETUP-SECTION-UNDER-TEST--*/ var aStopWATCH_SEQ: Timer;
/* ---------------------------------------SETUP-SECTION-UNDER-TEST--*/ var aStopWATCH_PAR: Timer;

//nst max_idx =    123456;                   // seems to be too fat  for <TiO>-IDE to allocate                  <TiO>--   /wrappers/chapel: line 6: 24467 Killed
const max_idx =      4096;
//nst max_idx =      8192;                   // seems to be too long for <TiO>-IDE to let it run [SEQ] part     <TiO>--  The request exceeded the 60 second time limit and was terminated
//nst max_idx =     16384;                   // seems to be too long for <TiO>-IDE to let it run [PAR] part too <TiO>--   /wrappers/chapel: line 6: 12043 Killed
const indices = 1..max_idx;

const   adom  = {indices, indices};
var A: [adom] int;

[(i,j) in adom] A[i, j] = i*j;               // Populate A[,]

var rowsums: [indices] int;

/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_SEQ.start();
for       i in indices {                     // SECTION-UNDER-TEST--
  rowsums[i] = + reduce(A[i, ..]);           // SECTION-UNDER-TEST--
}                                            // SECTION-UNDER-TEST--
/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_SEQ.stop();

                                               <SECTION-UNDER-TEST> took     8973 [us] to run in [SEQ] mode for    2 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took    28611 [us] to run in [SEQ] mode for    4 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took    58824 [us] to run in [SEQ] mode for    8 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   486786 [us] to run in [SEQ] mode for   64 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  1019990 [us] to run in [SEQ] mode for  128 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  2010680 [us] to run in [SEQ] mode for  256 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  4154970 [us] to run in [SEQ] mode for  512 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  8260960 [us] to run in [SEQ] mode for 1024 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 15853000 [us] to run in [SEQ] mode for 2048 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 33126800 [us] to run in [SEQ] mode for 4096 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took      n/a [us] to run in [SEQ] mode for 8192 elements on <TiO>-IDE

   ============================================ */

/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_PAR.start();
forall    i in indices {                     // SECTION-UNDER-TEST--
  rowsums[i] = + reduce(A[i, ..]);           // SECTION-UNDER-TEST--
}                                            // SECTION-UNDER-TEST--
/* ---------------------------------------------SECTION-UNDER-TEST--*/ aStopWATCH_PAR.stop();
                                               <SECTION-UNDER-TEST> took  12131 [us] to run in [PAR] mode for    2 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8095 [us] to run in [PAR] mode for    4 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8023 [us] to run in [PAR] mode for    8 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8156 [us] to run in [PAR] mode for   64 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   7990 [us] to run in [PAR] mode for  128 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took   8692 [us] to run in [PAR] mode for  256 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  15134 [us] to run in [PAR] mode for  512 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  16926 [us] to run in [PAR] mode for 1024 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took  30671 [us] to run in [PAR] mode for 2048 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 105323 [us] to run in [PAR] mode for 4096 elements on <TiO>-IDE
                                               <SECTION-UNDER-TEST> took 292232 [us] to run in [PAR] mode for 8192 elements on <TiO>-IDE

   ============================================ */

writeln( rowsums,
        "\n <SECTION-UNDER-TEST> took ", aStopWATCH_SEQ.elapsed( Time.TimeUnits.microseconds ), " [us] to run in [SEQ] mode for ", max_idx, " elements on <TiO>-IDE",
        "\n <SECTION-UNDER-TEST> took ", aStopWATCH_PAR.elapsed( Time.TimeUnits.microseconds ), " [us] to run in [PAR] mode for ", max_idx, " elements on <TiO>-IDE"


感谢您为 HPC 开发和改进如此出色的计算工具。

于 2017-08-17T05:54:18.283 回答