cython - Cython 编译器与数组崩溃——为什么？

Question

我正在尝试使用 memoryview 将以前的 python 代码转换为 cython，并且我不断从第一 bolo 行（靠近底部）得到编译器崩溃：

from __future__ import print_function
from builtins import range
from builtins import object
import cython
from cython.view cimport array as cvarray
from cpython cimport bool
from libc.math cimport round
from libc.stdlib cimport malloc, free
import numpy as np
cimport numpy as np

class LegCache(object):
def __init__(self):
    self.d = {}
    pass

def prep_legendre(self, n, polyorder):
    p = (n, polyorder)
    if p not in self.d:
        self.d[p] = prep_legendre(n, polyorder)
    return self.d[p]

@cython.boundscheck(False)
@cython.wraparound(False)
cdef prep_legendre(int n, int polyorder):
'''make array of legendre's'''
assert type(n) == int and type(polyorder) == int

cdef int[:,:] legendres = np.empty([n, polyorder + 1], dtype=int)
cdef int l = 0

legendres[:, 0] = np.ones(n)
if polyorder > 0:
    legendres[:, 1] = np.linspace(-1, 1, n)
for i in range(polyorder - 1):
    l = i + 1
    np.multiply(l/(l+1), legendres[:,l-1])

cdef double[:,:] q = np.empty([polyorder + 1, polyorder + 1], dtype=double)
cdef double[:,:] r = np.empty([n, polyorder + 1], dtype=double)
cdef double[:,:] qt = np.empty([polyorder+1, polyorder+1], dtype=double)   
cdef double[:,:] rinv = np.empty([polyorder + 1, n], dtype=double)

q, r = np.linalg.qr(legendres)
rinv = np.linalg.inv(r)
qt = q.T.copy()
return legendres, rinv, qt

def filter_slice_legendre_qr_mask_precalc(bolo,mask,legendres):
    m=legendres.shape[1]
    n=legendres.shape[0]
    l2 = legendres*np.tile(mask.reshape(n,1),[1,m])
    q,r=np.linalg.qr(l2)

rinv = np.linalg.inv(r)
p = np.dot(q.T,bolo)
coeff=np.dot(rinv,p)
out=bolo-np.dot(legendres,coeff)
return out,coeff

@cython.boundscheck(False)
@cython.wraparound(False)
cdef poly_filter_array(
    double[:,:] array,
    np.ndarray[DTYPE3_t, cast=True, ndim=2] mask_remove, # I think this casting should still work like this
    np.ndarray[DTYPE3_t, cast=True, ndim=2] mask,
    int[:] scan_list,
    int ibegin,
    int polyorder,
    double minfrac=.75):
""" writes over input array
"""
cdef double nold = -1
# do nothing
if polyorder < 0:
    return array
#damn, work
cdef int nch = array.shape[0]
cdef int nt = array.shape[1]
cdef int ns = len(scan_list)

cdef double[:,:,:] coeff_out = np.empty([nch, ns, nt], dtype = double)

legcache = LegCache()

cdef int istart = 0
cdef int n = 0
cdef int start = 0
cdef double mean = 0.0

cdef int[:,:] legendres = np.empty([n, polyorder + 1], dtype=int)
cdef double[:,:] qt = np.empty([polyorder+1, polyorder+1], dtype=double)   
cdef double[:,:] rinv = np.empty([polyorder + 1, n], dtype=double)

#cdef double[:,:] bolo # I think you can get away without giving it a value. bolo changes in size throughout the loop
cdef int[:] goodhits = np.empty(np.shape(mask)[1], dtype = int)
# I'm not sure about the size of these
cdef double[:,:] coeff = np.empty([]) # I can't remember how dot product work right now but this should be easy to sort out

# remove mean
if polyorder == 0:
    for s in range(len(scan_list)):
        istart, n = scan_list[s]
        start = istart - ibegin
        for i in range(nch):
            if np.any(mask[i, start:start + n]):
                mean = np.average(
                    array[i, start:start + n], weights=mask[i, start:start + n])
                array[i, start:start + n] -= mean
                coeff_out[i, s, 0] = mean

# other cases
if polyorder > 0:
    for s in range(len(scan_list)):
        istart, n = scan_list[s]
        start = istart - ibegin
        if n <= polyorder:  # otherwise cannot compute legendre polynomials
            for i in range(nch):
                mask[i, start:start + n] = 0  # flag it
                # remove this region from actual data as well
                mask_remove[i, start:start + n] = 0
                print('Not enough points (%d) to build legendre of order (%d)' % (n, polyorder))
            continue
        goodhits = np.sum(mask[:, start:start + n], axis=1)
        if n != nold:
            legendres, rinv, qt = legcache.prep_legendre(n, polyorder)
            rinvqt = np.dot(rinv, qt)
            nold = n
        # handle no masked ones

        for i in range(nch):
            if goodhits[i] != n:
                continue  # skip for now

            bolo[i, :] = array[i, start:start + n] #where problem starts
            coeff = np.dot(rinvqt, bolo)
            coeff_out[i, s, :] = coeff
            bolo -= np.dot(legendres, coeff)


        for i in range(nch):
            if goodhits[i] == n:
                continue  # skip since dealt with above
            if goodhits[i] < minfrac * n:  # not enough points
                mask[i, start:start + n] = 0  # flag it
                # remove this region from actual data as well
                mask_remove[i, start:start + n] = 0
                continue
            bolo, coeff = filter_slice_legendre_qr_mask_precalc(
                array[i, start:start + n], mask[i, start:start + n], legendres)
            array[i, start:start + n] = bolo
            coeff_out[i, s, :] = coeff
return coeff_out

当我尝试编译代码时，它会引发非特定错误“ExpandInplaceOperators 中的编译器崩溃”。我完全迷路了。

score 2 · Accepted Answer

生成错误的最少代码不需要运行。它只需要足以生成错误消息。我一直在删除代码，直到留下会生成错误消息的最小块，就是这样

cdef poly_filter_array(
    double[:,:] array):

    cdef double mean = 0.0

    cdef int i =0

    array[i, :] -= mean

编译此代码给出

Compiler crash in ExpandInplaceOperators

ModuleNode.body = StatListNode(cycrashdelme.pyx:7:5)
StatListNode.stats[0] = CFuncDefNode(cycrashdelme.pyx:7:5,
    args = [...]/1,
    modifiers = [...]/0,
    visibility = 'private')
CFuncDefNode.body = StatListNode(cycrashdelme.pyx:10:4)
StatListNode.stats[2] = InPlaceAssignmentNode(cycrashdelme.pyx:14:9,
    operator = '-')
File 'UtilNodes.py', line 146, in __init__: ResultRefNode(may_hold_none = True,
    result_is_used = True,
    use_managed_ref = True)

Compiler crash traceback from this point on:
  File "<some path on my computer>\lib\site-packages\Cython\Compiler\UtilNodes.py", line 146, in __init__
    assert self.pos is not None
AssertionError:

像这样的事情本来是一个更好的例子来问这个问题......

这几乎可以告诉您问题出在线路上array[i,:] -= mean（在您的原始版本中，我认为是array[:,start:start+n]）。

值得尝试一些简化版本来看看会发生什么：

array -= mean
array[i,:] = array[i,:] - mean

给

Invalid operand types for '-' (double[:, :]; double)
Invalid operand types for '-' (double[:]; double)

分别为两条线。所以我们知道问题在于 Cython 内存视图不支持整个视图的算术运算（尽管您可以对单个元素执行此操作）。

您可以通过暂时将其转换为 numpy 数组来进行算术运算

tmp_as_array = np.asarray(array[i,:])
tmp_as_array -= mean

（打字没有速度优势tmp_as_array）。这会修改现有数据，而不是复制它，您可以通过打印tmp_as_array.owndatawhich should be来验证False。

综上所述 -

你不能在 Cython memoryviews 上做算术（它们只是提供数组存储，但不是为了提供数学而设计的）。
您收到的错误消息非常无用，可能是一个错误，应该报告给https://github.com/cython/cython/issues。
复制您的代码，然后开始删除位，直到您确定实际导致错误的原因

cython - Cython 编译器与数组崩溃——为什么？

1 回答 1

Related

Reference