从这个问题中很难理解期望的结果是什么,但我认为你所追求的结果可以很容易地获得,如下所示:
y = (X.T * W[:,dim2,dim3]).sum(axis=0)
比较正确性和速度:
import numpy as np
# some test data, the sorting isn't really necessary
N1, N2, N3 = 67, 43, 91
ni_avg = 1.75
N = int(ni_avg * N2 * N3)
dim2 = np.random.randint(N2, size=N)
dim3 = np.sort(np.random.randint(N3, size=N))
for d3 in range(N3):
dim2[dim3==d3].sort()
X = np.random.rand(N, N1)
W = np.random.rand(N1, N2, N3)
# original code
def original():
y = np.empty(X.shape[0])
for d2 in range(W.shape[1]):
for d3 in range(W.shape[2]):
mask = ((dim3 == d3) & (dim2 == d2))
curr_X = X[mask, :]
curr_W = W[:,d2,d3]
curr_y = numpy.dot(curr_X,curr_W)
y[mask] = curr_y
return y
# comparison
%timeit original()
# 1 loops, best of 3: 672 ms per loop
%timeit (X.T * W[:,dim2,dim3]).sum(axis=0)
# 10 loops, best of 3: 31.8 ms per loop
np.allclose(original(), np.sum(X.T * W[:,dim2,dim3], axis=0))
# True
更快一点仍然是使用
y = np.einsum('ij,ji->i', X, W[:,dim2,dim3])