1

通过 MEX 运行的 C 文件使用 MKL Lapack 时,我遇到了由 LAPACKE_dgertrf 生成的 IPIV 具有无效索引的问题。LAPACKE_dgertrf 的文档指出 IPIV 的值应该在 1 和矩阵大小之间,而不是在其中一个值中得到 0。起初我以为这是为 FORTRAN 编写的文档,而 0 只是指第一行;但是,IPIV 还包含一个与矩阵大小相等的元素。

这是一个较大代码的问题,我可以在较小的测试用例中复制该代码:

C 代码(test_case.c):

#include "mex.h"
#include <mkl.h>
#include <math.h>
#include <stdbool.h>
#include <string.h>
#include <matrix.h>

void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray * prhs[]) {


   int N = (int) mxGetScalar(prhs[0]);
   int M = (int) mxGetScalar(prhs[1]);

   double * A = (double*) malloc(sizeof(double) * N * N);
   memcpy(A, mxGetPr(prhs[2]), sizeof(double) * N * N);

   double * B = (double*) malloc(sizeof(double) * N * M);
   memcpy(B, mxGetPr(prhs[3]),sizeof(double) * N * M);

   mexPrintf("\nA:\n");
   for (int i = 0; i < N; i++) {
     for (int j = 0; j < N; j++) {
       mexPrintf("%f ", A[j * N + i]);
     }
     mexPrintf("\n");
   }

   mexPrintf("\nB:\n");
   for (int i = 0; i < N; i++) {
     for (int j = 0; j < M; j++) {
       mexPrintf("%f ", B[j * N + i]);
     }
     mexPrintf("\n");
   }

   lapack_int * pivots = (lapack_int*)malloc(sizeof(lapack_int)*N);

   int info = LAPACKE_dgetrf(CblasColMajor,N,N,A,N,pivots); //Turn HPH   into is LU decomp
   if (info != 0) {
     fprintf(stderr,"info = %d\n",info);
     mexErrMsgTxt("ERROR:smooth_history_simple-LAPACKE_dgetrf failed");
   }

   mexPrintf("\nLU_A\n");
   for (int i = 0; i < N; i++) {
     for (int j = 0; j < N; j++) {
       mexPrintf("%f ", A[i + j * N]);
     }
     mexPrintf("\n");
   }

   mexPrintf("ipiv:\n");
   for (int i = 0; i < N; i++) {
     mexPrintf("%d ", pivots[i]);
   }

   LAPACKE_dgetrs(CblasColMajor,CblasTrans,N,M,A,N,pivots,B,M);

   mexPrintf("Solution:\n");
   for (int i = 0; i < N ; i++) {
     for (int j = 0; j < M; j++) {
       mexPrintf("%f ", B[i + N * j]);
     }
     mexPrintf("\n");
   }
}

要编译的 mex 命令是

mex CC="icc" CFLAGS="-D_GNU_SOURCE  -fexceptions -fPIC -fno-omit-frame-pointer -pthread -std=c99 -mkl" -v -largeArrayDims -I/opt/intel/mkl/include -L/opt/intel/mkl/lib/intel64 -lmkl_core -lmkl_intel_ilp64 -lmkl_intel_thread -lpthread -lm -liomp5 test_case.c

当我尝试在 MATLAB 中运行 MEX 文件时,我得到

    >> A = [0,5,5;2,9,0;6,8,8]

    A =

     0     5     5
     2     9     0
     6     8     8

    >> B = [1,0,0;0,1,0;0,0,1]


    B =

     1     0     0
     0     1     0
     0     0     1

>> test_case(3,3,A,B)
test_case(3,3,A,B)

A:
0.000000 5.000000 5.000000 
2.000000 9.000000 0.000000 
6.000000 8.000000 8.000000 

B:
1.000000 0.000000 0.000000 
0.000000 1.000000 0.000000 
0.000000 0.000000 1.000000 

LU_A
6.000000 8.000000 8.000000 
0.333333 6.333333 -2.666667 
0.000000 0.789474 7.105263 
ipiv:
3 0 2 
------------------------------------------------------------------------
       Segmentation violation detected at Mon Jul  6 14:48:38 2015
------------------------------------------------------------------------

Configuration:
  Crash Decoding     : Disabled
  Current Visual     : 0x25 (class 4, depth 24)
  Default Encoding   : US-ASCII
  GNU C Library      : 2.5 stable
  MATLAB Architecture: glnxa64
  MATLAB Root        : /opt/MATLAB/R2013a
  MATLAB Version     : 8.1.0.604 (R2013a)
  Operating System   : Linux 2.6.18-404.el5 #1 SMP Sat Mar 7 04:14:13 EST 2015 x86_64
  Processor ID       : x86 Family 6 Model 45 Stepping 2, GenuineIntel
  Virtual Machine    : Java 1.6.0_17-b04 with Sun Microsystems Inc. Java HotSpot(TM) 64-Bit Server VM mixed mode
  Window System      : Hummingbird - Open Text (13850), display localhost:35.0

Fault Count: 1


Abnormal termination:
Segmentation violation

Register State (from fault):
  RAX = 0000000000000001  RBX = 0000000000000066
  RCX = 00015858106d7848  RDX = 0000000000000003
  RSP = 00002b0b75e504c8  RBP = 0000000000000070
  RSI = 0000000000000003  RDI = 0000000000000000

   R8 = 00002b0b00000003   R9 = 0000000000000003
  R10 = 00002b0b94f288e0  R11 = 0000000000000000
  R12 = 0000000000000003  R13 = 0000000000000003
  R14 = 00000000106d77e0  R15 = 0000000000000003

  RIP = 00002b0b9c2f1aa3  EFL = 0000000000010206

   CS = 0033   FS = 0000   GS = 0000

Stack Trace (from fault):
[  0] 0x00002b0b9c2f1aa3 /opt/intel/composer_xe_2013.2.146/mkl/lib/intel64/libmkl_intel_ilp64.so+03218083 LAPACKE_dge_nancheck+00000035
[  1] 0x00002b0b9c1eccdd /opt/intel/composer_xe_2013.2.146/mkl/lib/intel64/libmkl_intel_ilp64.so+02149597 LAPACKE_dgetrs+00000141
[  2] 0x00002b0b9ab62c4e /lcl/mq/home/m1tjm01/projects/Hess/Kalman_Filter/test_case.mexa64+00007246 mexFunction+00000670
[  3] 0x00002b0b6cdeff8a           /opt/MATLAB/R2013a/bin/glnxa64/libmex.so+00110474 mexRunMexFile+00000090
[  4] 0x00002b0b6cdec0f9           /opt/MATLAB/R2013a/bin/glnxa64/libmex.so+00094457
[  5] 0x00002b0b6cdecf1c           /opt/MATLAB/R2013a/bin/glnxa64/libmex.so+00098076
[  6] 0x00002b0b646e36b2 /opt/MATLAB/R2013a/bin/glnxa64/libmwm_dispatcher.so+00562866 _ZN8Mfh_file11dispatch_fhEiPP11mxArray_tagiS2_+00000594
[  7] 0x00002b0b64b33256 /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+02245206
[  8] 0x00002b0b64abf09d /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+01769629
[  9] 0x00002b0b64ae7b0e /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+01936142
[ 10] 0x00002b0b64ae4993 /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+01923475
[ 11] 0x00002b0b64ae5797 /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+01927063
[ 12] 0x00002b0b64b50e50 /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+02367056
[ 13] 0x00002b0b646e36b2 /opt/MATLAB/R2013a/bin/glnxa64/libmwm_dispatcher.so+00562866 _ZN8Mfh_file11dispatch_fhEiPP11mxArray_tagiS2_+00000594
[ 14] 0x00002b0b64b1fdcb /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+02166219
[ 15] 0x00002b0b64add7cc /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+01894348
[ 16] 0x00002b0b64ad9e1d /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+01879581
[ 17] 0x00002b0b64ada255 /opt/MATLAB/R2013a/bin/glnxa64/libmwm_interpreter.so+01880661
[ 18] 0x00002b0b6cbbdfae      /opt/MATLAB/R2013a/bin/glnxa64/libmwbridge.so+00139182
[ 19] 0x00002b0b6cbbe111      /opt/MATLAB/R2013a/bin/glnxa64/libmwbridge.so+00139537
[ 20] 0x00002b0b6cbbece5      /opt/MATLAB/R2013a/bin/glnxa64/libmwbridge.so+00142565 _Z8mnParserv+00000725
[ 21] 0x00002b0b644183d2         /opt/MATLAB/R2013a/bin/glnxa64/libmwmcr.so+00447442 _ZN11mcrInstance30mnParser_on_interpreter_threadEv+00000034
[ 22] 0x00002b0b643f79ac         /opt/MATLAB/R2013a/bin/glnxa64/libmwmcr.so+00313772
[ 23] 0x00002b0b643f7b88         /opt/MATLAB/R2013a/bin/glnxa64/libmwmcr.so+00314248
[ 24] 0x00002b0b6f5f15c6         /opt/MATLAB/R2013a/bin/glnxa64/libmwuix.so+00480710
[ 25] 0x00002b0b6f5fedf2         /opt/MATLAB/R2013a/bin/glnxa64/libmwuix.so+00536050
[ 26] 0x00002b0b63d68862    /opt/MATLAB/R2013a/bin/glnxa64/libmwservices.so+01845346
[ 27] 0x00002b0b63d6950f    /opt/MATLAB/R2013a/bin/glnxa64/libmwservices.so+01848591 _Z25svWS_ProcessPendingEventsiib+00001615
[ 28] 0x00002b0b643f85ef         /opt/MATLAB/R2013a/bin/glnxa64/libmwmcr.so+00316911
[ 29] 0x00002b0b643f8f5c         /opt/MATLAB/R2013a/bin/glnxa64/libmwmcr.so+00319324
[ 30] 0x00002b0b643f2592         /opt/MATLAB/R2013a/bin/glnxa64/libmwmcr.so+00292242
[ 31] 0x000000347dc0683d                             /lib64/libpthread.so.0+00026685
[ 32] 0x000000347d0d4fcd                                   /lib64/libc.so.6+00872397 clone+00000109


This error was detected while a MEX-file was running. If the MEX-file
is not an official MathWorks function, please examine its source code
for errors. Please consult the External Interfaces Guide for information
on debugging MEX-files.

If this problem is reproducible, please submit a Service Request via:
    http://www.mathworks.com/support/contact_us/

A technical support engineer might contact you with further information.

Thank you for your help.** This crash report has been saved to disk as /mq/home/m1tjm01/matlab_crash_dump.21353-1 **



MATLAB is exiting because of fatal error

由此,我观察到当打印行数据透视表时,它同时包含 0 和 3。由于矩阵是 3 x 3,因此这两个都不是有效索引。文档说 0 条目不应该在那里。我强烈怀疑这会导致 dgetrs 中的段错误。

有谁知道这里发生了什么?

4

0 回答 0