我使用 NumPy 在 Python 中编写了一个反向传播神经网络,用于矩阵计算和批量更新。它很好地学习了 XOR 之类的二进制函数,但是当我在iris
数据集 (in sklearn.datasets
) 上使用 one-vs-all 训练它时(我的目标函数是y = iris.target == 1
),它决定将所有 1 或所有 -1 放在输出上。我已经尝试了 [0.01, 5] 的学习率,[3, 20] 节点的隐藏层大小,以及高达 50k 的 epoch 没有任何改进。
下面是NN的重要代码。_sigmoid
是 numpy 的 tanh 函数,_dsigmoid
是它的导数。我真的很感激任何帮助!
def __init__(self, n_input, n_hidden, n_output):
self.n_input = n_input + 1
self.n_hidden = n_hidden
self.n_output = n_output
self.w1 = np.random.normal(scale=0.7, size=(self.n_input*self.n_hidden)).reshape(self.n_input, self.n_hidden)
self.w2 = np.random.normal(scale=0.7, size=(self.n_hidden*self.n_output)).reshape(self.n_hidden, self.n_output)
self.output_activation = np.zeros(n_output)
self.hidden_activation = np.zeros(n_hidden)
self.input_activation = np.zeros(n_input)
def feed_forward(self):
"""
Update output vector created by feed-forward propagation of input activations
"""
self.hidden_activation = self._sigmoid(np.dot(self.input_activation, self.w1))
self.output_activation = self._sigmoid(np.dot(self.hidden_activation, self.w2))
def back_propagate(self, target, alpha):
output_error = target - self.output_activation
output_delta = output_error * self._dsigmoid(self.output_activation)
hidden_error = np.dot(output_delta, self.w2.T)
hidden_delta = hidden_error * self._dsigmoid(self.hidden_activation)
self.w2 += alpha * (np.dot(self.hidden_activation.T, output_delta))
self.w1 += alpha * (np.dot(self.input_activation.T, hidden_delta))
def train(self, data, target, alpha, epochs=50):
m = data.shape[0]
# add bias to input
X = np.ones((m, self.n_input))
X[:, 1:] = data
# turn target into a column vector
target = target[:, np.newaxis]
for epoch in range(epochs):
self.input_activation = X
self.feed_forward()
self.back_propagate(target, alpha)
def predict(self, data):
m = data.shape[0]
self.input_activation = np.ones((m, self.n_input))
self.input_activation[:, 1:] = data
self.feed_forward()
return self.output_activation