cython - 在 cython 中包装 ScaLAPACK 函数

Question

我使用lapack和blas库来实现矩阵求逆和矩阵乘法cython。这是我的代码：

import numpy as np
cimport numpy as np
cimport cython
import ctypes    
cdef extern from "f2pyptr.h": 
     void *f2py_pointer(object) except NULL

import scipy.linalg.lapack 
import scipy.linalg.blas
ctypedef np.float64_t REAL_t

ctypedef void (*dgetri_ptr) (long *N, double *A, long *LDA, long *IPIV, double *WORK, long *LWORK, long *INFO) nogil
cdef dgetri_ptr dgetri = <dgetri_ptr>f2py_pointer(scipy.linalg.lapack.dgetri._cpointer) 

ctypedef void (*dgetrf_ptr) (long *M, long *N, double *A, long *LDA, long *IPIV, long *INFO) nogil
cdef dgetrf_ptr dgetrf = <dgetrf_ptr>f2py_pointer(scipy.linalg.lapack.dgetrf._cpointer) 

cdef void inverse_matrix(REAL_t* A, long n, long info):
     #Computes the inverse of an LU-factored general matrix using sgetri routine.
     #This routine computes the inverse inv(A) of a general matrix A. Before calling this routine, call sgetrf to factorize A
     cdef np.ndarray[long, ndim=1] ipiv   = np.empty(n, dtype=np.int64)
     cdef np.ndarray[double, ndim=1] work = np.empty(n, dtype=np.float64)
     dgetrf(&n, &n, A, &n, &ipiv[0], &info)
     dgetri(&n, A, &n, &ipiv[0], &work[0], &n, &info)


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef REAL_t[::1,:] inverse(REAL_t[::1,:] A):
      if (A.shape[0] !=A.shape[1]):
         raise TypeError("The input array must be a square matrix")
      cdef long N = A.shape[0]
      cdef long info
      A=np.require(A, requirements=['F'])
      inverse_matrix(&A[0,0], N, info)

      if info < 0:
          raise ValueError('illegal value in %d-th argument of internal getrf' % -info)
      elif info > 0:
          raise ValueError("Diagonal number %d is exactly zero. Singular matrix." % -info)
      return A

ctypedef void (*dgetri_ptr) (long *N, double *A, long *LDA, long *IPIV, double *WORK, long *LWORK, long *INFO) nogil
cdef dgetri_ptr dgetri = <dgetri_ptr>f2py_pointer(scipy.linalg.lapack.dgetri._cpointer) 
ctypedef void (*dgemm_ptr) (char *TRANSA, char *TRANSB, long *M, long *N, long *K, double *ALPHA, double *A, long *LDA, double *B, long *LDB, double *BETA, double *C, long *LDC) nogil  
cdef dgemm_ptr dgemm = <dgemm_ptr>f2py_pointer(scipy.linalg.blas.dgemm._cpointer) 
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef REAL_t[::1,:] dot_product(REAL_t[::1,:] a, REAL_t[::1,:] b, char* TRANSA ='N', char* TRANSB='N', REAL_t ALPHA = 1.0, REAL_t BETA  = 0.0):

      cdef REAL_t[::1,:] c_view = np.zeros((a.shape[0], b.shape[1]), dtype=np.float64, order='F') 
      cdef long M, N, K, LDA, LDB, LDC
      cdef char* cN = 'N'
      cdef char* cT = 'T'
      if (a.shape[1] !=b.shape[0]):
         raise TypeError("The numbers of columns in matrix a must be equal to number of rows in matrix b")
      if (TRANSA   == cN):
         M = a.shape[0] #the number  of rows  of the  matrix op( A )  and of the  matrix  C
      elif (TRANSA == cT):
         M = a.shape[1]
      if (TRANSB   == cN):
         N = b.shape[1]
      elif (TRANSB == cT):
         N = b.shape[0]
      K   = a.shape[1]
      LDA = a.shape[0]
      LDB = b.shape[0]
      LDC = a.shape[0]
      dgemm(TRANSA, TRANSB, &M, &N, &K, &ALPHA, &a[0,0], &LDA, &b[0,0], &LDB, &BETA, &c_view[0,0], &LDC)
      return c_view

我想通过拆分计算并在多个处理器上运行它们来提高我的代码速度。如何通过在我的代码中包装以下函数来扩展此代码并使用scalapackcython库：

pdgetri使用 LU 分解计算分布矩阵的逆。
pdgetrf计算通用 M×N 分布矩阵的 LU 分解，使用部分旋转和行交换。
pdtran_转置矩阵。
pdgemm_执行矩阵-矩阵乘法之一。

更新一： 为了能够使用pdgetri或者pdgetrf我应该使用以下方法将它们从他们的fortran脚本和cpdgemm_脚本中包装起来？

scalapack_wrap.f90

subroutine C_PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, IWORK, LIWORK, INFO ) bind(c, NAME='C_PDGETRI')
           use iso_c_binding, only: c_double, c_int
           implicit none

           real(c_double), dimension(N, N), intent(in):: A
           real(c_double), dimension(LWORK), intent(in):: WORK
           integer(c_int), dimension(N), intent(in):: DESCA
           integer(c_int), dimension(N), intent(in)::IPIV
           integer(c_int), dimension(LWORK), intent(in):: IWORK
           integer(c_int), intent(in) :: N, IA, JA, LIWORK, LWORK, INFO
           call PDGETRI( N, A, IA, JA, DESCA, IPIV, WORK, LWORK, IWORK, LIWORK, INFO )
end subroutine C_PDGETRI

subroutine C_PDGETRF(M, N, A, IA, JA, DESCA, IPIV, INFO) bind(c, NAME='C_PDGETRF')
           use iso_c_binding, only: c_double, c_int
           implicit none

           real(c_double), dimension(M, N), intent(in):: A
           integer(c_int), dimension(N), intent(in):: DESCA
           integer(c_int), dimension(N), intent(in)::IPIV
           integer(c_int), intent(in) :: M, N, IA, JA, INFO
           call PDGETRF( M, N, A, IA, JA, DESCA, IPIV, INFO )
end subroutine C_PDGETRF

scalapack_func.h

extern void C_PDGETRI( long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, double* WORK, long* LWORK, int* IWORK, long* LIWORK, int* INFO );
extern void C_PDGETRF( long* M, long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, int* INFO);

scalapack_funcs.pyx

import numpy as np
cimport numpy as np
cimport cython
cdef extern from "scalapack_func.h":
     void C_PDGETRI( long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, double* WORK, long* LWORK, int* IWORK, long* LIWORK, int* INFO);
     void C_PDGETRF( long* M, long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, int* INFO);

cdef double[:,::1] pdgetri_(double[:,::1] A):
     cdef long N      = A.shape[0]
     cdef long LWORK  = A.shape[1]
     cdef long LIWORK = A.shape[1]
     cdef int INFO    = 0
     cdef int IA      = 0 #the row index in the global array A indicating the first row of sub( A )
     cdef int JA      = 0 #The column index in the global array A indicating the first column of sub( A ).
     cdef double[::1] WORK = np.empty(LWORK, dtype=np.float64)
     cdef int[::1] IWORK   = np.empty(LIWORK, dtype=np.int32)
     cdef int[::1] IPIV    = np.empty(N, dtype=np.int32)
     cdef int[::1] DESCA   = np.empty(N, dtype=np.int32)
     C_PDGETRI(&N, &A[0,0],  &IA, &JA, &DESCA[0], &IPIV[0], &WORK[0], &LWORK, &IWORK[0], &LIWORK, &INFO)
     if INFO < 0:
         raise ValueError('illegal value in %d-th argument of internal pdgetri' % -INFO)
     elif INFO > 0:
         raise ValueError("Diagonal number %d is exactly zero. Singular matrix." % -INFO)
     return A 

cdef double[:,::1] pdgetrf_(double[:,::1] A):
     cdef long M      = A.shape[0] #The number of rows to be operated on,
     cdef long N      = A.shape[1] #The number of columns to be operated on
     cdef int INFO    = 0
     cdef int IA      = 0 #the row index in the global array A indicating the first row of sub( A )
     cdef int JA      = 0 #The column index in the global array A indicating the first column of sub( A ).
     cdef int[::1] IPIV    = np.empty(M, dtype=np.int32)
     cdef int[::1] DESCA   = np.empty(M, dtype=np.int32)
     C_PDGETRF(&M, &N, &A[0,0], &IA, &JA, &DESCA[0], &IPIV[0], &INFO)
     if INFO < 0:
         raise ValueError('illegal value in %d-th argument of internal pdgetrf' % -INFO)
     elif INFO > 0:
         raise ValueError("Diagonal number %d is exactly zero. Singular matrix." % -INFO)
     return A   
@cython.wraparound(False)
@cython.boundscheck(False)
cpdef double[:,::1] inverse(double[:,::1] A):     
      if (A.shape[0] !=A.shape[1]):
          raise TypeError("The input array must be a square matrix")
      A=np.require(A, requirements=['F'])
      pdgetrf_(A)
      pdgetri_(A)
      return A

更新二：

安装程序.py

from distutils.core import setup
from distutils.extension import Extension
from Cython.Distutils import build_ext
# This line only needed if building with NumPy in Cython file.
from numpy import get_include
import subprocess
import os
from os import system
import warnings
import mpi4py

fortran_mod_comp = 'mpif90 /usr/local/scalapack-2.0.2/SRC/pdgetri.f -c -o pdgetri_.o -O3 -fPIC'
print fortran_mod_comp
system(fortran_mod_comp)

fortran_mod_comp = 'mpif90 /usr/local/scalapack-2.0.2/SRC/pdgetrf.f -c -o pdgetrf_.o -O3 -fPIC'
print fortran_mod_comp
system(fortran_mod_comp)

shared_obj_comp = 'mpif90 scalapack_wrap.f90 -c -o scalapack_wrap.o -O3 -fPIC'
print shared_obj_comp
system(shared_obj_comp)
def runcommand(cmd):
    process = subprocess.Popen(cmd.split(), shell=False, stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT, universal_newlines=True)
    c = process.communicate()
    if process.returncode != 0:
        raise Exception("Something went wrong whilst running the command: %s" % cmd)
    return c[0]

def whichscalapack():
    # Figure out which Scalapack to use
    if 'MKLROOT' in os.environ:
        return 'intelmkl'
    else:
        return 'netlib'

def whichmpi():
    # Figure out which MPI environment this is
    import re
    try:
        mpiv = runcommand('mpirun -V')
        if re.search('Intel', mpiv):
            return 'intelmpi'
        elif re.search('Open MPI', mpiv):
            return 'openmpi'
    except:
        return 'mpich'
    warnings.warn('Unknown MPI environment.')
    return None

scalapackversion = whichscalapack()
mpiversion = whichmpi()
if mpiversion == 'openmpi':
    # Fetch the arguments for linking and compiling.
    mpilinkargs = runcommand('mpicc -showme:link').split()
    mpicompileargs = runcommand('mpicc -showme:compile').split()

if scalapackversion == 'intelmkl':
    # Set library includes (taking into account which MPI library we are using)."
    scl_lib = ['mkl_scalapack_lp64', 'mkl_rt', 'mkl_blacs_'+mpiversion+'_lp64', 'iomp5', 'pthread']
    scl_libdir = [os.environ['MKLROOT']+'/lib/intel64' if 'MKLROOT' in os.environ else '']
elif scalapackversion == 'netlib':
    scl_lib = ['scalapack', 'gfortran']
    scl_libdir = [ os.path.dirname(runcommand('gfortran -print-file-name=libgfortran.a')) ]
else:
    raise Exception("Scalapack distribution unsupported. Please modify setup.py manually.")


ext_modules = [Extension(# module name:
                         'scalapack_funcs',
                         # source file:
                         sources=['scalapack_funcs.pyx'],
                         # Needed if building with NumPy. This includes the NumPy headers when compiling.
                         include_dirs = [get_include(), mpi4py.get_include()],
                         #include libraries 
                         library_dirs=scl_libdir, libraries=scl_lib,
                         #libraries=["scalapack"],
                         #library_dirs=[library_dirs],
                         # other compile args for gcc
                         extra_compile_args=[ "-fopenmp", "-lmkl_lapack95_lp64", "-lmkl_core", "-lmkl_intel_lp64",  "-lmkl_rt", "-lmkl_intel_thread" ] + mpicompileargs,
                         # other files to link to
                         extra_link_args=['pdgetrf_.o', 'pdgetri_.o', 'scalapack_wrap.o','-fopenmp', "-lmkl_lapack95_lp64", "-lmkl_core", "-lmkl_intel_lp64", "-lmkl_rt", "-lmkl_intel_thread"] + mpilinkargs)]

setup(
      cmdclass = {'build_ext': build_ext},
      ext_modules = ext_modules
      )

代码被编译但是

>>> import scalapack_funcs
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ImportError: /opt/intel/mkl/lib/intel64/libmkl_scalapack_lp64.so: undefined symbol: dapply_2hv

正如danny指出的那样，问题可能只是使用scalapack库正确编译代码。但我仍然收到上述错误消息。任何建议如何摆脱它？

score 1 · Accepted Answer

扩展需要链接到它正在使用的库。undefined symbol表示头文件中包含的库未链接到共享对象，并且找不到该库的符号。

大概ilcm来自scalapack，不确定库名称是什么。将libraries设置添加到所需Extension的所有库中。

Extension('scalapack_funcs',
          sources=['scalapack_funcs.pyx'],
          libraries=['scalapack'],
          extra_compile_args=['-fPIC', '-O3'],
          extra_link_args=['pdgetri_.o', 'pdgetrf_.o', 'scalapack_wrap.o'])]

score 1 · Accepted Answer

看来你几乎是对的。添加要从中导入函数的特定头文件，并使用 FORTRAN 函数的正确名称C_PDGETRI。编译时，请务必添加正确的引用"your_fortran_header.h"以及相应的先前编译的 FORTRAN 静态库。

cdef extern from "your_fortran_header.h":
     void C_PDGETRI( long* N, double* A, int* IA, int* JA, int* DESCA, int* IPIV, double* WORK, long* LWORK, int* IWORK, int* LIWORK, int* INFO )

cdef double[:,::1] pdgetri_(double[:,::1] A not None):
     cdef long N = A.shape[0]
     cdef long LWORK = A.shape[1]
     cdef long LIWORK = A.shape[1]
     cdef int INFO = 0
     cdef long IA = 0 #the row index in the global array A indicating the first row of sub( A )
     cdef long JA = 0 #The column index in the global array A indicating the first column of sub( A ).
     cdef double[::1] WORK = np.empty(LWORK, dtype=np.float64)
     cdef int[::1] IWORK = np.empty(LIWORK, dtype=np.int32)
     cdef int[::1] IPIV = np.empty(N, dtype=np.int32)
     cdef int[::1] DESCA = np.empty(N, dtype=np.int32)
     C_PDGETRI(&N, &A[0,0], &IA, &JA, &DESCA[0], &IPIV[0], &WORK[0], &LWORK, &IWORK[0], &LIWORK, &INFO)
     return A

cython - 在 cython 中包装 ScaLAPACK 函数

2 回答 2

Related

Reference