这只是对@Han-Kwang Nienhuys 回答的简短评论。这里要改进的主要事情是避免矢量化命令,这会导致相当高的性能下降。
通常,如果您使用默认的 C 有序数组,最好更改输入和输出 (n,3) 的数组形状而不是 (3,n)。
import numpy as np
import numba as nb
from scipy.ndimage import map_coordinates
# examples of f, g, h
# examples of x1, y1, z1
fgh = np.array([f, g, h]).T.copy().T # optimize memory layout
#from Han-Kwang Nienhuys
def mymap(ars, coords):
"""ars is input arrays, shape (m, nx, ny, nz)
coords is coordinate array, float, shape (3, n)
# these have shape (n, 3)
ijk = coords.T.astype(np.int16)
fijk = (coords.T - ijk).astype(np.float32)
n = ijk.shape[0]
m = ars.shape[0]
out = np.empty((n, m), dtype=np.float64)
for l in range(n):
i0, j0, k0 = ijk[l, :3]
# Note: don't write i1, j1, k1 = ijk[l, :3]+1 -- much slower.
i1, j1, k1 = i0+1, j0+1, k0+1
fi1, fj1, fk1 = fijk[l, :3]
fi0, fj0, fk0 = 1-fi1, 1-fj1, 1-fk1
out[l, :] = (
fi0 * fj0 * fk0 * ars[:, i0, j0, k0] +
fi0 * fj0 * fk1 * ars[:, i0, j0, k1] +
fi0 * fj1 * fk0 * ars[:, i0, j1, k0] +
fi0 * fj1 * fk1 * ars[:, i0, j1, k1] +
fi1 * fj0 * fk0 * ars[:, i1, j0, k0] +
fi1 * fj0 * fk1 * ars[:, i1, j0, k1] +
fi1 * fj1 * fk0 * ars[:, i1, j1, k0] +
fi1 * fj1 * fk1 * ars[:, i1, j1, k1]
return out.T
#optimized version
def mymap_opt(ars, coords):
"""ars is input arrays, shape (m, nx, ny, nz)
coords is coordinate array, float, shape (3, n)
# these have shape (n, 3)
ijk = coords.T.astype(np.int16)
fijk = (coords.T - ijk).astype(np.float32)
n = ijk.shape[0]
m = ars.shape[0]
out = np.empty((n, m), dtype=np.float64)
for l in nb.prange(n):
i0= ijk[l, 0]
j0= ijk[l, 1]
k0 =ijk[l, 2]
# Note: don't write i1, j1, k1 = ijk[l, :3]+1 -- much slower.
i1, j1, k1 = i0+1, j0+1, k0+1
fi1= fijk[l, 0]
fj1= fijk[l, 1]
fk1 = fijk[l, 2]
fi0, fj0, fk0 = 1-fi1, 1-fj1, 1-fk1
for i in range(ars.shape[0]):
out[l, i] = (
fi0 * fj0 * fk0 * ars[i, i0, j0, k0] +
fi0 * fj0 * fk1 * ars[i, i0, j0, k1] +
fi0 * fj1 * fk0 * ars[i, i0, j1, k0] +
fi0 * fj1 * fk1 * ars[i, i0, j1, k1] +
fi1 * fj0 * fk0 * ars[i, i1, j0, k0] +
fi1 * fj0 * fk1 * ars[i, i1, j0, k1] +
fi1 * fj1 * fk0 * ars[i, i1, j1, k0] +
fi1 * fj1 * fk1 * ars[i, i1, j1, k1]
return out.T
out_1 = mymap(fgh, coords)
out_2 = mymap_opt(fgh, coords)
%timeit out = mymap(fgh, coords)
#1.09 s ± 13.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit out = mymap_opt(fgh, coords)
#144 ms ± 5.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#259 ms ± 4.76 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)