1

我正在尝试使用批量归一化和 Adam 作为优化器来创建神经网络,但在优化期间,出现一个错误,提示操作数不能一起广播。谁能告诉我错误在哪里?

def Adam(W,dW,lr,beta1=0.9,beta=0.999,epsilon=1e-7,k=0):
    m=np.zeros_like(W)
    v=np.zeros_like(W)
    k=k+1
    m=beta1*m+(1-beta1)*dW
    v=beta2*v+(1-beta2)*dW*dW
    hatm=m/(1-np.power(beta1,k))
    hatv=v/(1-np.power(beta2,k))
    return W-lr/np.maximum(np.sqrt(hatv),epsilon)*hatm

nx_train=x_train/255
nx_test=x_test/255

d0=nx_train.shape[1]
d1=200
d2=100
d3=10

np.random.seed(8)
W1=np.random.rand(d0,d1)*0.2-0.1
W2=np.random.rand(d1,d2)*0.2-0.1
W3=np.random.rand(d2,d3)*0.2-0.1
b1=np.zeros(d1)
b2=np.zeros(d2)
b3=np.zeros(d3)
gamma1=np.ones(d1)
beta1=np.zeros(d1)
gamma2=np.ones(d2)
beta2=np.zeros(d2)

lr=0.01
batch_size=100
epoch=100
shuffle=True

y_train=predict(nx_train,W1,b1,gamma1,beta1,W2,b2,gamma2,beta2,W3,b3)
y_test=predict(nx_test,W1,b1,gamma1,beta1,W2,b2,gamma2,beta2,W3,b3)

train_rate,train_err=accuracy_rate(y_train,t_train),cross_entropy_error(y_train,t_train)
test_rate,test_err=accuracy_rate(y_test,t_test),cross_entropy_error(y_test,t_test)
print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((0), train_rate*100, test_rate*100, train_err, test_err))


for i in range(epoch):
    for j in range(0,nx_train.shape[0],batch_size):
        W1,b1,gamma1,beta1,W2,b2,gamma2,beta2,W3,b3=learn(nx_train[j:j+batch_size],t_train[j:j+batch_size],W1,b1,gamma1,beta1,W2,b2,gamma2,beta2,W3,b3,lr)

    y_train=predict(nx_train,W1,b1,gamma1,beta1,W2,b2,gamma2,beta2,W3,b3)
    y_test=predict(nx_test,W1,b1,gamma1,beta1,W2,b2.gamma2,beta2,W3,b3)


    train_rate,train_err=accuracy_rate(y_train,t_train),cross_entropy_error(y_train,t_train)
    test_rate,test_err=accuracy_rate(y_test,t_test),cross_entropy_error(y_test,t_test)

    test_predictions = np.argmax(y_test,1)
    true_classes = np.argmax(t_test,1)
    cm=confusion_matrix(true_classes, test_predictions)
    print(cm)

    print("{0:3d} train_rate={1:6.2f}% test_rate={2:6.2f}% train_err={3:8.5f} test_err={4:8.5f}".format((i+1), train_rate*100, test_rate*100, train_err, test_err))


错误表示“操作数无法与形状 (100,) (784,200) 一起广播”,位于“v=beta2*v+(1-beta2) dW d”。

4

0 回答 0