我有一个 cython 代码,我尝试使用 prange cython 命令对其进行并行化。代码可以编译,但是当我运行它时,它只在单个线程/核心上运行。我在其他帖子中读到,大多数情况下这是由于 gil 没有正确发布,但是当我查看我的代码时,我看不到这种情况发生在哪里。你知道我的代码有什么问题吗?
更新:
- 编译器:gcc 7.5
- 赛通:0.29.21
- 操作系统:ubuntu 20.04
赛通代码:
import cython
from cython.parallel import prange
cimport numpy as cnp
import numpy as np
cdef extern from "math.h" nogil:
double floor(double x)
double ceil(double x)
double sqrt(double x)
cdef inline double round(double r) nogil:
return floor(r + 0.5) if (r > 0.0) else ceil(r - 0.5)
@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef int atoms_in_shell_inner(double[:,:] coords, int[:,:] indexes, double[:,:] cell, double[:,:] rcell, double[:] boxed_center, double r2, int mol_idx) nogil:
cdef double r, rx, ry, rz, sdx, sdy, sdz, x, y, z, x_boxed, y_boxed, z_boxed
cdef int at_idx, i
# Loop over the selected atoms j of molecule i
for 0 <= i < indexes.shape[0]:
if indexes[mol_idx,i] == -1:
return 0
at_idx = indexes[mol_idx,i]
x = coords[at_idx,0]
y = coords[at_idx,1]
z = coords[at_idx,2]
# Convert real coordinates to box coordinates
x_boxed = x*rcell[0,0] + y*rcell[0,1] + z*rcell[0,2]
y_boxed = x*rcell[1,0] + y*rcell[1,1] + z*rcell[1,2]
z_boxed = x*rcell[2,0] + y*rcell[2,1] + z*rcell[2,2]
sdx = x_boxed - boxed_center[0]
sdy = y_boxed - boxed_center[1]
sdz = z_boxed - boxed_center[2]
# Apply the PBC to the box coordinates distance vector between atom j and the center of the shell
sdx -= round(sdx)
sdy -= round(sdy)
sdz -= round(sdz)
# Convert back the box coordinates distance vector to real coordinates distance vector
rx = sdx*cell[0,0] + sdy*cell[0,1] + sdz*cell[0,2]
ry = sdx*cell[1,0] + sdy*cell[1,1] + sdz*cell[1,2]
rz = sdx*cell[2,0] + sdy*cell[2,1] + sdz*cell[2,2]
# Compute the squared norm of the distance vector in real coordinates
r = rx*rx + ry*ry + rz*rz
# If the distance is below the cutoff mark the molecule i as being in the shell
if r < r2:
return 1
return 0
@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
def atoms_in_shell(double[:,:] coords,
double[:,:] cell,
double[:,:] rcell,
int[:,:] indexes,
cnp.int32_t center,
cnp.float64_t radius):
cdef int i, n_molecules
cdef double[:] shell_center = coords[center,:]
cdef double[:] boxed_center = np.empty(3,dtype=np.float)
cdef int[:] in_shell = np.zeros(indexes.shape[0],dtype=np.int32)
n_molecules = indexes.shape[0]
boxed_center[0] = shell_center[0]*rcell[0,0] + shell_center[1]*rcell[0,1] + shell_center[2]*rcell[0,2]
boxed_center[1] = shell_center[0]*rcell[1,0] + shell_center[1]*rcell[1,1] + shell_center[2]*rcell[1,2]
boxed_center[2] = shell_center[0]*rcell[2,0] + shell_center[1]*rcell[2,1] + shell_center[2]*rcell[2,2]
# Loop over the molecules
for i in prange(n_molecules,nogil=True):
in_shell[i] = atoms_in_shell_inner(coords, indexes, cell, rcell, boxed_center, radius*radius, i)
return in_shell.base
setup.py 文件:
from Cython.Distutils import build_ext
from distutils.core import setup, Extension
import numpy
INCLUDE_DIR = [numpy.get_include()]
EXTENSIONS = [Extension('atoms_in_shell',
include_dirs=INCLUDE_DIR,
sources=["atoms_in_shell.pyx"],
extra_compile_args = ["-O3", "-ffast-math", "-march=native", "-fopenmp" ],
extra_link_args=['-fopenmp']),
]
setup(ext_modules=EXTENSIONS, cmdclass={'build_ext': build_ext})
Python代码:
from atoms_in_shell import atoms_in_shell
import numpy as np
coords = np.random.uniform(-1000.0,1000.0,(500000000,3))
cell = np.identity(3)
rcell = np.identity(3)
indexes = np.empty((10000,6),dtype=np.int32)
indexes.fill(-1)
for row in indexes:
n_atoms = np.random.randint(1,6)
row[:n_atoms] = np.random.choice(coords.shape[0]-1,n_atoms)
print(atoms_in_shell(coords, cell, rcell, indexes, 5, 1))