1

我使用 NumPy 在 Python 中编写了一个反向传播神经网络,用于矩阵计算和批量更新。它很好地学习了 XOR 之类的二进制函数,但是当我在iris数据集 (in sklearn.datasets) 上使用 one-vs-all 训练它时(我的目标函数是y = iris.target == 1),它决定将所有 1 或所有 -1 放在输出上。我已经尝试了 [0.01, 5] 的学习率,[3, 20] 节点的隐藏层大小,以及高达 50k 的 epoch 没有任何改进。

下面是NN的重要代码。_sigmoid是 numpy 的 tanh 函数,_dsigmoid是它的导数。我真的很感激任何帮助!

def __init__(self, n_input, n_hidden, n_output):
    self.n_input = n_input + 1
    self.n_hidden = n_hidden
    self.n_output = n_output
    self.w1 = np.random.normal(scale=0.7, size=(self.n_input*self.n_hidden)).reshape(self.n_input, self.n_hidden)
    self.w2 = np.random.normal(scale=0.7, size=(self.n_hidden*self.n_output)).reshape(self.n_hidden, self.n_output)
    self.output_activation = np.zeros(n_output)
    self.hidden_activation = np.zeros(n_hidden)
    self.input_activation = np.zeros(n_input)

def feed_forward(self):
    """
    Update output vector created by feed-forward propagation of input activations
    """
    self.hidden_activation = self._sigmoid(np.dot(self.input_activation, self.w1))
    self.output_activation = self._sigmoid(np.dot(self.hidden_activation, self.w2))

def back_propagate(self, target, alpha):        
    output_error = target - self.output_activation
    output_delta = output_error * self._dsigmoid(self.output_activation)

    hidden_error = np.dot(output_delta, self.w2.T)
    hidden_delta = hidden_error * self._dsigmoid(self.hidden_activation)

    self.w2 += alpha * (np.dot(self.hidden_activation.T, output_delta))
    self.w1 += alpha * (np.dot(self.input_activation.T, hidden_delta))

def train(self, data, target, alpha, epochs=50):
    m = data.shape[0]

    # add bias to input
    X = np.ones((m, self.n_input))
    X[:, 1:] = data

    # turn target into a column vector
    target = target[:, np.newaxis]

    for epoch in range(epochs):
        self.input_activation = X
        self.feed_forward()
        self.back_propagate(target, alpha)

def predict(self, data):
    m = data.shape[0]
    self.input_activation = np.ones((m, self.n_input))
    self.input_activation[:, 1:] = data
    self.feed_forward()
    return self.output_activation
4

1 回答 1

0

这对我有用:

import numpy as np
import sklearn.datasets
import math

class NN():
    def __init__(self, n_input, n_hidden, n_output):
        self.n_input = n_input + 1
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.w1 = np.random.normal(scale=0.7, size=(self.n_input*self.n_hidden)).reshape(self.n_input, self.n_hidden)
        self.w2 = np.random.normal(scale=0.7, size=(self.n_hidden*self.n_output)).reshape(self.n_hidden, self.n_output)
        self.output_activation = np.zeros(n_output)
        self.hidden_activation = np.zeros(n_hidden)
        self.input_activation = np.zeros(n_input)

    def _sigmoid(self,x):
        return(1/(1+math.e**(-x))) #sigmoid
        #return(np.tanh(x.astype(float))) #tanh

    def _dsigmoid(self,x):
        return(x*(1-x)) #sigmoid
        #return(1-x**2) #tanh

    def feed_forward(self):
        """
        Update output vector created by feed-forward propagation of input activations
        """
        self.hidden_activation = self._sigmoid(np.dot(self.input_activation, self.w1))
        self.output_activation = self._sigmoid(np.dot(self.hidden_activation, self.w2))

    def back_propagate(self, target, alpha):        
        output_error = (target - self.output_activation)
        output_delta = output_error * self._dsigmoid(self.output_activation)

        hidden_error = np.dot(output_delta, self.w2.T)
        hidden_delta = hidden_error * self._dsigmoid(self.hidden_activation)

        self.w2 += alpha * (np.dot(self.hidden_activation.T, output_delta))
        self.w1 += alpha * (np.dot(self.input_activation.T, hidden_delta))

    def train(self, data, target, alpha, epochs=50):
        m = data.shape[0]

        # add bias to input
        X = np.ones((m, self.n_input))
        X[:, 1:] = data

        # turn target into a column vector
        target = target[:, np.newaxis]

        for epoch in range(epochs):
            self.input_activation = X
            self.feed_forward()
            self.back_propagate(target, alpha)

    def predict(self, data):
        m = data.shape[0]
        self.input_activation = np.ones((m, self.n_input))
        self.input_activation[:, 1:] = data
        self.feed_forward()
        return self.output_activation

iris = sklearn.datasets.load_iris()
data = iris['data']
targets = iris['target']
for i,t in enumerate(targets):
    if t!=1:
        targets[i] = 0
network = NN(4,3,1)
network.train(data,targets,0.01,epochs=10000)
print(network.predict(data))
print(targets)

我将函数更改为 sigmoid,因为当您的目标在 0 和 1 之间时它更有意义。问题可能出在您的数据准备或您未共享的其他内容中。不过,这似乎不太可能,因为我无法让它与 tanh 一起工作,而且结果根据隐藏神经元的数量而有很大差异。我认为你应该看看你的反向传播代码,也许尝试梯度检查。

于 2020-08-26T20:32:37.047 回答