0

我的神经网络能够了解样本是 OR 表还是 XOR 表。但是,如果我想让它学习 AND 表,它会拒绝。

神经网络配置:

1 个输入层,2 个输入神经元

1 个隐藏层,3 个神经元

1 个输出层和 1 个结果神经元。

总共 9 个权重,6 个分配在输入和隐藏之间,3 个分配在隐藏和输出之间。

使用 sigmoid 作为激活函数。

使用 OR 或 XOR,值往往会接近正确的值,如下所示:

或者

0 0 - 0.0232535024 // ~0

0 1 - 0.9882075648 // ~1

1 0 - 0.9881840412 // ~1

1 1 - 0.9932447649 // ~1

异或

0 0 - 0.0419020172 // ~0

0 1 - 0.9742653893 // ~1

1 0 - 0.9742622526 // ~1

1 1 - 0.0096044003 // ~0

但是当我尝试对它进行 AND 训练时,前 3 行趋向于接近 0,但最后一行 (1,1) 趋向于接近 0.5,而不是超过 0.5。

0 0 - 0.0007202012 // ~0

0 1 - 0.0151875796 // ~0

1 0 - 0.0128653577 // ~0

1 1 - 0.4987960208 // 事件未关闭,趋于接近 0.5,1 的一半

如果需要代码,请告诉我我会发布它。我想知道我的方法是否正确。

我可以对所有情况应用相同的激活函数吗?为什么它接近 1 的一半 0.5?我从概念上理解这一点有什么问题吗?

我关注了 https://stevenmiller888.github.io/mind-how-to-build-a-neural-network/ 和其他一些人来了解 NN 以及如何实现它。使用 Java

下面是我的课:

package com.example;


import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.linear.RealVector;

public class MyNeuralNet {

float learningRate = .7f;
int iterations = 10000;
int hiddenUnits = 3;

double[][] input = new double[4][2];//asdf
double[][] knownOp = new double[4][1];//asdf
double[][] errorOutputLayer = new double[4][1];
double[][] inputHiddenArray = new double[2][3];
double[][] hiddenOutputArray = new double[3][1];

RealMatrix inputHidden;
RealMatrix hiddenOutput;
RealMatrix outputSum;
RealMatrix outPutResult;
RealMatrix hiddenSum;
RealMatrix hidderResult;

public static void main(String[] args){
    System.out.println("Hello! NeuralNet");
    MyNeuralNet mind = new MyNeuralNet();

    init(mind);

    for(int i = 0; i < mind.iterations; i++) {

        //run forward
        RealMatrix afterFwd = mind.forward();

        //calculate the difference between expected and obtained
        loggResult("afterFwd:: "+afterFwd.toString());
        RealMatrix m = MatrixUtils.createRealMatrix(mind.knownOp);
        RealMatrix n = afterFwd;//MatrixUtils.createRealMatrix(hiddenWeightsOne);
        // Now subtract m by n
        RealMatrix errorOutputLayer = m.subtract(n);
        RealVector errorOutputLayerVector = errorOutputLayer.getColumnVector(0);
        loggCost("errorOutputLayer: " + errorOutputLayer.toString());
        logg(errorOutputLayer.getRowDimension() + " * " + errorOutputLayer.getColumnDimension());

        //back propagate
        mind.backPropagate(errorOutputLayer);
    }


}

private static void init(MyNeuralNet mind) {
    mind.input[0] = new double[]{0,0};
    mind.input[1] = new double[]{0,1};
    mind.input[2] = new double[]{1,0};
    mind.input[3] = new double[]{1,1};

    mind.knownOp[0] = new double[]{0};
    mind.knownOp[1] = new double[]{0};
    mind.knownOp[2] = new double[]{0};
    mind.knownOp[3] = new double[]{1};

    mind.inputHiddenArray[0] = new double[]{.8f,.4f,.3f};
    mind.inputHiddenArray[1] = new double[]{.2f,.9f,.5f};

    mind.inputHidden = MatrixUtils.createRealMatrix(mind.inputHiddenArray);

    mind.hiddenOutputArray[0] = new double[]{.3f};
    mind.hiddenOutputArray[1] = new double[]{.5f};
    mind.hiddenOutputArray[2] = new double[]{.9f};

    mind.hiddenOutput = MatrixUtils.createRealMatrix(mind.hiddenOutputArray);
}

private RealMatrix forward() {

    RealMatrix m = MatrixUtils.createRealMatrix(input);
    // Now multiply m by n
    hiddenSum = m.multiply(inputHidden);
    logg("hiddenSum: "+hiddenSum);
    logg(hiddenSum.getRowDimension() + " * " + hiddenSum.getColumnDimension());

    hidderResult = activate(hiddenSum);//MatrixUtils.createRealMatrix(activatedArray);
    logg("hidderResult: "+ hidderResult);
    logg(hidderResult.getRowDimension() + " * " + hidderResult.getColumnDimension());

    // Now multiply m by n
    outputSum = hidderResult.multiply(hiddenOutput);
    logg("outputSum: "+ outputSum);
    logg(outputSum.getRowDimension() + " * " + outputSum.getColumnDimension());

    outPutResult = activate(outputSum);//MatrixUtils.createRealMatrix(activatedArrayOut);
    logg("outPutResult:: "+outPutResult.toString());
    logg(outPutResult.getRowDimension() + " * " + outPutResult.getColumnDimension());

    return outPutResult;


}
private void backPropagate(RealMatrix errorOutputLayerVector) {

    RealMatrix sigmaPrimeMatrix = activatePrime(outputSum);
    RealVector sigmaPrimeVector = sigmaPrimeMatrix.getColumnVector(0);
    com.example.MatrixUtils.getElementWiseProduct(sigmaPrimeMatrix,errorOutputLayerVector);
    double deltaOutputLayer = sigmaPrimeVector.dotProduct(errorOutputLayerVector.getColumnVector(0));
    RealMatrix deltaOutputLayerMatrix = com.example.MatrixUtils.getElementWiseProduct(sigmaPrimeMatrix,errorOutputLayerVector);
    logg("deltaOutputLayer1: " + deltaOutputLayerMatrix);
    logg(deltaOutputLayerMatrix.getRowDimension() + " * " + deltaOutputLayerMatrix.getColumnDimension());

    RealMatrix hiddenOutputChanges = hidderResult.transpose().multiply(deltaOutputLayerMatrix).scalarMultiply((learningRate));
    logg("hiddenOutputChanges: " + hiddenOutputChanges);
    logg(hiddenOutputChanges.getRowDimension() + " * " + hiddenOutputChanges.getColumnDimension());

    RealMatrix sigmaPrime2Matrix = activatePrime(hiddenSum);
    RealMatrix p2 = deltaOutputLayerMatrix.multiply(hiddenOutput.transpose());
    RealMatrix deltaHiddenLayer = com.example.MatrixUtils.getElementWiseProduct(sigmaPrime2Matrix,p2);
    logg("deltaHiddenLayer: " + deltaHiddenLayer);
    logg(deltaHiddenLayer.getRowDimension() + " * " + deltaHiddenLayer.getColumnDimension());

    RealMatrix inputMatrix = MatrixUtils.createRealMatrix(input);
    RealMatrix inputHiddenChanges = inputMatrix.transpose().multiply(deltaHiddenLayer).scalarMultiply(learningRate);
    logg("inputHiddenChanges: " + inputHiddenChanges.toString());
    logg(inputHiddenChanges.getRowDimension() + " * " + inputHiddenChanges.getColumnDimension());

}




private RealMatrix activatePrime(RealMatrix hiddenSum) {
    double activatedPrimeArray[][] = new double[hiddenSum.getRowDimension()][hiddenSum.getColumnDimension()];
    for (int i = 0; i < hiddenSum.getRowDimension(); i++){
        for (int j = 0; j < hiddenSum.getColumnDimension(); j++){
            activatedPrimeArray[i][j] = sigmoidPrime(hiddenSum.getEntry(i,j));
        }
    }

    RealMatrix sigmaPrimeMatrix = MatrixUtils.createRealMatrix(activatedPrimeArray);
    return sigmaPrimeMatrix;
}

private RealMatrix activate(RealMatrix hiddenSum) {
    double activatedPrimeArray[][] = new double[hiddenSum.getRowDimension()][hiddenSum.getColumnDimension()];
    for (int i = 0; i < hiddenSum.getRowDimension(); i++){
        for (int j = 0; j < hiddenSum.getColumnDimension(); j++){
            activatedPrimeArray[i][j] = sigmoid(hiddenSum.getEntry(i,j));
        }
    }

    RealMatrix sigmaPrimeMatrix = MatrixUtils.createRealMatrix(activatedPrimeArray);
    return sigmaPrimeMatrix;
}

private static void loggCost(String str){
    System.out.println("COST:::::::::::::::::::::::" +str);    // 2
}

private static void loggWeights(String str){
    System.out.println("Weights:::::::::::::::::::::::" +str);
}

private static void loggResult(String s) {
    System.out.println(s);
}

public static double sigmoid(double x) {
    return (1/( 1 + Math.pow(Math.E,(-1*x))));
}

public static double sigmoidPrime(double x) {
    return sigmoid(x) * (1 - sigmoid(x));
}

private static void logg(String str){
    System.out.println(str);
}


}

这是一个更新,如果我随机更改 9 个权重,它似乎工作正常

    //from .9 to .8
    mind.hiddenOutputArray[2] = new double[]{.8f};
4

0 回答 0