0

When using the ReLU activation function in my Neural Network I noticed all the outputs were the same. The Sigmoid activation function works just fine. Is there something wrong with the ReLU function? Why is this happening? What can I do to fix this?

The example is a simple XOR Neural Network written in Lua.

math.randomseed(os.time())

local nn =  require("NeuralNetwork")

network = nn.new(2,1,1,4,0.2, "ReLU")

local x=0
local attempts = 100000

for i = 1,attempts do
    
    x+=1
    if x > 10000 then
        wait()
        x=0
    end
    
    network:backPropagate({0, 0}, {0})
    network:backPropagate({1, 0}, {1})
    network:backPropagate({0, 1}, {1})
    network:backPropagate({1, 1}, {0})
end

print("0 0 | "..network:feedForward({0,0})[1])
print("1 0 | "..network:feedForward({1,0})[1])
print("0 1 | "..network:feedForward({0,1})[1])
print("1 1 | "..network:feedForward({1,1})[1])

Output:

0 0 | 0.48780487804878037
1 0 | 0.48780487804878037
0 1 | 0.48780487804878037
1 1 | 0.48780487804878037

- Library -

local nn = {}
nn.__index = nn

nn.ActivationFunctions = {
    sigmoid = function(x) return 1/(1+math.exp(-x)) end,
    ReLU = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
    sigmoid = function(x) return x * (1 - x) end,
    ReLU = function(x) return x >= 0 and 1 or 0 end,
}
nn.CostFunctions = {
    MSE = function(outputs, expected)
        local sum = 0
        for i = 1, #outputs do
            sum += (expected[i] - outputs[i])^2
        end
        return sum/#outputs
    end,
}

function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
    
    local self = setmetatable({}, nn)
    
    self.learningRate = learningRate or .3
    self.activationFunction = activationFunction or "ReLu"
    self.net = {}
    
    local net = self.net
    local layers = hiddenLayers+2
    
    for i = 1, layers do
        net[i] = {}
    end
    
    for i = 1, inputs do
        net[1][i] = {value = 0}
    end
    for i = 2, layers-1 do
        for x = 1, neurons do
            net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
            for z = 1, #net[i-1] do
                net[i][x].weights[z] = math.random()*2-1
            end
        end
    end
    for i = 1, outputs do
        net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
        for z = 1, #net[layers-1] do
            net[layers][i].weights[z] = math.random()*2-1
        end
    end
    
    return self
    
end

function nn.newFromRawData(data)
    
    return setmetatable(data, nn)
    
end

function nn:feedForward(inputs)
    
    local net = self.net
    local activation = self.activationFunction
    local layers = #net
    
    local inputLayer = net[1]
    local outputLayer = net[layers]
    
    
    for i = 1, #inputLayer do
        inputLayer[i].value = inputs[i]
    end
    
    for i = 2, layers do
        local layer = net[i]
        for x = 1, #layer do
            local sum = layer[x].bias
            for z = 1, #net[i-1] do
                sum += net[i-1][z].value * layer[x].weights[z]
            end
            layer[x].netInput = sum
            layer[x].value = nn.ActivationFunctions[activation](sum)
        end 
    end
    
    local outputs = {}
    
    for i = 1, #outputLayer do
        table.insert(outputs, outputLayer[i].value)
    end
    
    return outputs
    
end

function nn:backPropagate(inputs, expected)
    
    local outputs = self:feedForward(inputs)
    --print(outputs)
    
    local net = self.net
    local activation = self.activationFunction
    local layers = #net
    local lr = self.learningRate
    
    local inputLayer = net[1]
    local outputLayer = net[layers]
    
    for i = 1, #outputLayer do
        local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](outputs[i])
        outputLayer[i].delta = delta    
    end
    
    for i = layers-1, 2, -1 do
        local layer = net[i]
        local nextLayer = net[i+1]
        for x = 1, #layer do
            local delta = 0
            for z = 1, #nextLayer do
                delta += nextLayer[z].delta * nextLayer[z].weights[x]
            end
            layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
        end
    end
    
    for i = 2, layers do
        local lastLayer = net[i-1]
        for x = 1, #net[i] do
            net[i][x].bias -= lr * net[i][x].delta
            for z = 1, #lastLayer do
                net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value
            end
        end
    end
    
end

return nn
4

0 回答 0