When using the ReLU activation function in my Neural Network I noticed all the outputs were the same. The Sigmoid activation function works just fine. Is there something wrong with the ReLU function? Why is this happening? What can I do to fix this?
The example is a simple XOR Neural Network written in Lua.
math.randomseed(os.time())
local nn = require("NeuralNetwork")
network = nn.new(2,1,1,4,0.2, "ReLU")
local x=0
local attempts = 100000
for i = 1,attempts do
x+=1
if x > 10000 then
wait()
x=0
end
network:backPropagate({0, 0}, {0})
network:backPropagate({1, 0}, {1})
network:backPropagate({0, 1}, {1})
network:backPropagate({1, 1}, {0})
end
print("0 0 | "..network:feedForward({0,0})[1])
print("1 0 | "..network:feedForward({1,0})[1])
print("0 1 | "..network:feedForward({0,1})[1])
print("1 1 | "..network:feedForward({1,1})[1])
Output:
0 0 | 0.48780487804878037
1 0 | 0.48780487804878037
0 1 | 0.48780487804878037
1 1 | 0.48780487804878037
- Library -
local nn = {}
nn.__index = nn
nn.ActivationFunctions = {
sigmoid = function(x) return 1/(1+math.exp(-x)) end,
ReLU = function(x) return math.max(0, x) end,
}
nn.Derivatives = {
sigmoid = function(x) return x * (1 - x) end,
ReLU = function(x) return x >= 0 and 1 or 0 end,
}
nn.CostFunctions = {
MSE = function(outputs, expected)
local sum = 0
for i = 1, #outputs do
sum += (expected[i] - outputs[i])^2
end
return sum/#outputs
end,
}
function nn.new(inputs, outputs, hiddenLayers, neurons, learningRate, activationFunction)
local self = setmetatable({}, nn)
self.learningRate = learningRate or .3
self.activationFunction = activationFunction or "ReLu"
self.net = {}
local net = self.net
local layers = hiddenLayers+2
for i = 1, layers do
net[i] = {}
end
for i = 1, inputs do
net[1][i] = {value = 0}
end
for i = 2, layers-1 do
for x = 1, neurons do
net[i][x] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[i-1] do
net[i][x].weights[z] = math.random()*2-1
end
end
end
for i = 1, outputs do
net[layers][i] = {netInput = 0, value = 0, bias = math.random()*2-1, weights = {}}
for z = 1, #net[layers-1] do
net[layers][i].weights[z] = math.random()*2-1
end
end
return self
end
function nn.newFromRawData(data)
return setmetatable(data, nn)
end
function nn:feedForward(inputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #inputLayer do
inputLayer[i].value = inputs[i]
end
for i = 2, layers do
local layer = net[i]
for x = 1, #layer do
local sum = layer[x].bias
for z = 1, #net[i-1] do
sum += net[i-1][z].value * layer[x].weights[z]
end
layer[x].netInput = sum
layer[x].value = nn.ActivationFunctions[activation](sum)
end
end
local outputs = {}
for i = 1, #outputLayer do
table.insert(outputs, outputLayer[i].value)
end
return outputs
end
function nn:backPropagate(inputs, expected)
local outputs = self:feedForward(inputs)
--print(outputs)
local net = self.net
local activation = self.activationFunction
local layers = #net
local lr = self.learningRate
local inputLayer = net[1]
local outputLayer = net[layers]
for i = 1, #outputLayer do
local delta = -(expected[i] - outputs[i]) * nn.Derivatives[activation](outputs[i])
outputLayer[i].delta = delta
end
for i = layers-1, 2, -1 do
local layer = net[i]
local nextLayer = net[i+1]
for x = 1, #layer do
local delta = 0
for z = 1, #nextLayer do
delta += nextLayer[z].delta * nextLayer[z].weights[x]
end
layer[x].delta = delta * nn.Derivatives[activation](layer[x].value)
end
end
for i = 2, layers do
local lastLayer = net[i-1]
for x = 1, #net[i] do
net[i][x].bias -= lr * net[i][x].delta
for z = 1, #lastLayer do
net[i][x].weights[z] -= lr * net[i][x].delta * lastLayer[z].value
end
end
end
end
return nn