我的神经网络能够了解样本是 OR 表还是 XOR 表。但是,如果我想让它学习 AND 表,它会拒绝。
神经网络配置:
1 个输入层,2 个输入神经元
1 个隐藏层,3 个神经元
1 个输出层和 1 个结果神经元。
总共 9 个权重,6 个分配在输入和隐藏之间,3 个分配在隐藏和输出之间。
使用 sigmoid 作为激活函数。
使用 OR 或 XOR,值往往会接近正确的值,如下所示:
或者
0 0 - 0.0232535024 // ~0
0 1 - 0.9882075648 // ~1
1 0 - 0.9881840412 // ~1
1 1 - 0.9932447649 // ~1
异或
0 0 - 0.0419020172 // ~0
0 1 - 0.9742653893 // ~1
1 0 - 0.9742622526 // ~1
1 1 - 0.0096044003 // ~0
但是当我尝试对它进行 AND 训练时,前 3 行趋向于接近 0,但最后一行 (1,1) 趋向于接近 0.5,而不是超过 0.5。
和
0 0 - 0.0007202012 // ~0
0 1 - 0.0151875796 // ~0
1 0 - 0.0128653577 // ~0
1 1 - 0.4987960208 // 事件未关闭,趋于接近 0.5,1 的一半
如果需要代码,请告诉我我会发布它。我想知道我的方法是否正确。
我可以对所有情况应用相同的激活函数吗?为什么它接近 1 的一半 0.5?我从概念上理解这一点有什么问题吗?
我关注了 https://stevenmiller888.github.io/mind-how-to-build-a-neural-network/ 和其他一些人来了解 NN 以及如何实现它。使用 Java
下面是我的课:
package com.example;
import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.linear.RealVector;
public class MyNeuralNet {
float learningRate = .7f;
int iterations = 10000;
int hiddenUnits = 3;
double[][] input = new double[4][2];//asdf
double[][] knownOp = new double[4][1];//asdf
double[][] errorOutputLayer = new double[4][1];
double[][] inputHiddenArray = new double[2][3];
double[][] hiddenOutputArray = new double[3][1];
RealMatrix inputHidden;
RealMatrix hiddenOutput;
RealMatrix outputSum;
RealMatrix outPutResult;
RealMatrix hiddenSum;
RealMatrix hidderResult;
public static void main(String[] args){
System.out.println("Hello! NeuralNet");
MyNeuralNet mind = new MyNeuralNet();
init(mind);
for(int i = 0; i < mind.iterations; i++) {
//run forward
RealMatrix afterFwd = mind.forward();
//calculate the difference between expected and obtained
loggResult("afterFwd:: "+afterFwd.toString());
RealMatrix m = MatrixUtils.createRealMatrix(mind.knownOp);
RealMatrix n = afterFwd;//MatrixUtils.createRealMatrix(hiddenWeightsOne);
// Now subtract m by n
RealMatrix errorOutputLayer = m.subtract(n);
RealVector errorOutputLayerVector = errorOutputLayer.getColumnVector(0);
loggCost("errorOutputLayer: " + errorOutputLayer.toString());
logg(errorOutputLayer.getRowDimension() + " * " + errorOutputLayer.getColumnDimension());
//back propagate
mind.backPropagate(errorOutputLayer);
}
}
private static void init(MyNeuralNet mind) {
mind.input[0] = new double[]{0,0};
mind.input[1] = new double[]{0,1};
mind.input[2] = new double[]{1,0};
mind.input[3] = new double[]{1,1};
mind.knownOp[0] = new double[]{0};
mind.knownOp[1] = new double[]{0};
mind.knownOp[2] = new double[]{0};
mind.knownOp[3] = new double[]{1};
mind.inputHiddenArray[0] = new double[]{.8f,.4f,.3f};
mind.inputHiddenArray[1] = new double[]{.2f,.9f,.5f};
mind.inputHidden = MatrixUtils.createRealMatrix(mind.inputHiddenArray);
mind.hiddenOutputArray[0] = new double[]{.3f};
mind.hiddenOutputArray[1] = new double[]{.5f};
mind.hiddenOutputArray[2] = new double[]{.9f};
mind.hiddenOutput = MatrixUtils.createRealMatrix(mind.hiddenOutputArray);
}
private RealMatrix forward() {
RealMatrix m = MatrixUtils.createRealMatrix(input);
// Now multiply m by n
hiddenSum = m.multiply(inputHidden);
logg("hiddenSum: "+hiddenSum);
logg(hiddenSum.getRowDimension() + " * " + hiddenSum.getColumnDimension());
hidderResult = activate(hiddenSum);//MatrixUtils.createRealMatrix(activatedArray);
logg("hidderResult: "+ hidderResult);
logg(hidderResult.getRowDimension() + " * " + hidderResult.getColumnDimension());
// Now multiply m by n
outputSum = hidderResult.multiply(hiddenOutput);
logg("outputSum: "+ outputSum);
logg(outputSum.getRowDimension() + " * " + outputSum.getColumnDimension());
outPutResult = activate(outputSum);//MatrixUtils.createRealMatrix(activatedArrayOut);
logg("outPutResult:: "+outPutResult.toString());
logg(outPutResult.getRowDimension() + " * " + outPutResult.getColumnDimension());
return outPutResult;
}
private void backPropagate(RealMatrix errorOutputLayerVector) {
RealMatrix sigmaPrimeMatrix = activatePrime(outputSum);
RealVector sigmaPrimeVector = sigmaPrimeMatrix.getColumnVector(0);
com.example.MatrixUtils.getElementWiseProduct(sigmaPrimeMatrix,errorOutputLayerVector);
double deltaOutputLayer = sigmaPrimeVector.dotProduct(errorOutputLayerVector.getColumnVector(0));
RealMatrix deltaOutputLayerMatrix = com.example.MatrixUtils.getElementWiseProduct(sigmaPrimeMatrix,errorOutputLayerVector);
logg("deltaOutputLayer1: " + deltaOutputLayerMatrix);
logg(deltaOutputLayerMatrix.getRowDimension() + " * " + deltaOutputLayerMatrix.getColumnDimension());
RealMatrix hiddenOutputChanges = hidderResult.transpose().multiply(deltaOutputLayerMatrix).scalarMultiply((learningRate));
logg("hiddenOutputChanges: " + hiddenOutputChanges);
logg(hiddenOutputChanges.getRowDimension() + " * " + hiddenOutputChanges.getColumnDimension());
RealMatrix sigmaPrime2Matrix = activatePrime(hiddenSum);
RealMatrix p2 = deltaOutputLayerMatrix.multiply(hiddenOutput.transpose());
RealMatrix deltaHiddenLayer = com.example.MatrixUtils.getElementWiseProduct(sigmaPrime2Matrix,p2);
logg("deltaHiddenLayer: " + deltaHiddenLayer);
logg(deltaHiddenLayer.getRowDimension() + " * " + deltaHiddenLayer.getColumnDimension());
RealMatrix inputMatrix = MatrixUtils.createRealMatrix(input);
RealMatrix inputHiddenChanges = inputMatrix.transpose().multiply(deltaHiddenLayer).scalarMultiply(learningRate);
logg("inputHiddenChanges: " + inputHiddenChanges.toString());
logg(inputHiddenChanges.getRowDimension() + " * " + inputHiddenChanges.getColumnDimension());
}
private RealMatrix activatePrime(RealMatrix hiddenSum) {
double activatedPrimeArray[][] = new double[hiddenSum.getRowDimension()][hiddenSum.getColumnDimension()];
for (int i = 0; i < hiddenSum.getRowDimension(); i++){
for (int j = 0; j < hiddenSum.getColumnDimension(); j++){
activatedPrimeArray[i][j] = sigmoidPrime(hiddenSum.getEntry(i,j));
}
}
RealMatrix sigmaPrimeMatrix = MatrixUtils.createRealMatrix(activatedPrimeArray);
return sigmaPrimeMatrix;
}
private RealMatrix activate(RealMatrix hiddenSum) {
double activatedPrimeArray[][] = new double[hiddenSum.getRowDimension()][hiddenSum.getColumnDimension()];
for (int i = 0; i < hiddenSum.getRowDimension(); i++){
for (int j = 0; j < hiddenSum.getColumnDimension(); j++){
activatedPrimeArray[i][j] = sigmoid(hiddenSum.getEntry(i,j));
}
}
RealMatrix sigmaPrimeMatrix = MatrixUtils.createRealMatrix(activatedPrimeArray);
return sigmaPrimeMatrix;
}
private static void loggCost(String str){
System.out.println("COST:::::::::::::::::::::::" +str); // 2
}
private static void loggWeights(String str){
System.out.println("Weights:::::::::::::::::::::::" +str);
}
private static void loggResult(String s) {
System.out.println(s);
}
public static double sigmoid(double x) {
return (1/( 1 + Math.pow(Math.E,(-1*x))));
}
public static double sigmoidPrime(double x) {
return sigmoid(x) * (1 - sigmoid(x));
}
private static void logg(String str){
System.out.println(str);
}
}
这是一个更新,如果我随机更改 9 个权重,它似乎工作正常
//from .9 to .8
mind.hiddenOutputArray[2] = new double[]{.8f};