2

我有一个 cython 代码,我尝试使用 prange cython 命令对其进行并行化。代码可以编译,但是当我运行它时,它只在单个线程/核心上运行。我在其他帖子中读到,大多数情况下这是由于 gil 没有正确发布,但是当我查看我的代码时,我看不到这种情况发生在哪里。你知道我的代码有什么问题吗?

更新:

  • 编译器:gcc 7.5
  • 赛通:0.29.21
  • 操作系统:ubuntu 20.04

赛通代码:

import cython
from cython.parallel import prange
cimport numpy as cnp 

import numpy as np

cdef extern from "math.h" nogil:

    double floor(double x)
    double ceil(double x)
    double sqrt(double x)

cdef inline double round(double r) nogil:
    return floor(r + 0.5) if (r > 0.0) else ceil(r - 0.5)

@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef int atoms_in_shell_inner(double[:,:] coords, int[:,:] indexes, double[:,:] cell, double[:,:] rcell, double[:] boxed_center, double r2, int mol_idx) nogil:

    cdef double r, rx, ry, rz, sdx, sdy, sdz, x, y, z, x_boxed, y_boxed, z_boxed

    cdef int at_idx, i

    # Loop over the selected atoms j of molecule i
    for 0 <= i < indexes.shape[0]:
    
        if indexes[mol_idx,i] == -1:
            return 0

        at_idx = indexes[mol_idx,i]

        x = coords[at_idx,0]
        y = coords[at_idx,1]
        z = coords[at_idx,2]

        # Convert real coordinates to box coordinates
        x_boxed = x*rcell[0,0] + y*rcell[0,1] + z*rcell[0,2]
        y_boxed = x*rcell[1,0] + y*rcell[1,1] + z*rcell[1,2]
        z_boxed = x*rcell[2,0] + y*rcell[2,1] + z*rcell[2,2]

        sdx = x_boxed - boxed_center[0]
        sdy = y_boxed - boxed_center[1]
        sdz = z_boxed - boxed_center[2]

        # Apply the PBC to the box coordinates distance vector between atom j and the center of the shell
        sdx -= round(sdx)
        sdy -= round(sdy)
        sdz -= round(sdz)

        # Convert back the box coordinates distance vector to real coordinates distance vector
        rx = sdx*cell[0,0] + sdy*cell[0,1] + sdz*cell[0,2]
        ry = sdx*cell[1,0] + sdy*cell[1,1] + sdz*cell[1,2]
        rz = sdx*cell[2,0] + sdy*cell[2,1] + sdz*cell[2,2]

        # Compute the squared norm of the distance vector in real coordinates
        r = rx*rx + ry*ry + rz*rz

        # If the distance is below the cutoff mark the molecule i as being in the shell
        if r < r2:
            return 1

    return 0

@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
def atoms_in_shell(double[:,:] coords,
                   double[:,:] cell,
                   double[:,:] rcell,
                   int[:,:] indexes,
                   cnp.int32_t center,
                   cnp.float64_t radius):

    cdef int i, n_molecules

    cdef double[:] shell_center = coords[center,:]
    cdef double[:] boxed_center = np.empty(3,dtype=np.float)
    cdef int[:] in_shell = np.zeros(indexes.shape[0],dtype=np.int32)
    
    n_molecules = indexes.shape[0]

    boxed_center[0] = shell_center[0]*rcell[0,0] + shell_center[1]*rcell[0,1] + shell_center[2]*rcell[0,2]
    boxed_center[1] = shell_center[0]*rcell[1,0] + shell_center[1]*rcell[1,1] + shell_center[2]*rcell[1,2]
    boxed_center[2] = shell_center[0]*rcell[2,0] + shell_center[1]*rcell[2,1] + shell_center[2]*rcell[2,2]
        
    # Loop over the molecules
    for i in prange(n_molecules,nogil=True):
            
        in_shell[i] = atoms_in_shell_inner(coords, indexes, cell, rcell, boxed_center, radius*radius, i)

    return in_shell.base

setup.py 文件:

from Cython.Distutils import build_ext

from distutils.core import setup, Extension

import numpy

INCLUDE_DIR = [numpy.get_include()]

EXTENSIONS = [Extension('atoms_in_shell',
                        include_dirs=INCLUDE_DIR,
                        sources=["atoms_in_shell.pyx"],
                        extra_compile_args = ["-O3", "-ffast-math", "-march=native", "-fopenmp" ],
                        extra_link_args=['-fopenmp']),
                        ]

setup(ext_modules=EXTENSIONS, cmdclass={'build_ext': build_ext})

Python代码:

from atoms_in_shell import atoms_in_shell

import numpy as np

coords = np.random.uniform(-1000.0,1000.0,(500000000,3))

cell = np.identity(3)

rcell = np.identity(3)

indexes = np.empty((10000,6),dtype=np.int32)
indexes.fill(-1)

for row in indexes:
    n_atoms = np.random.randint(1,6)
    row[:n_atoms] = np.random.choice(coords.shape[0]-1,n_atoms)

print(atoms_in_shell(coords, cell, rcell, indexes, 5, 1))
4

0 回答 0