c# - 我在前馈的输出中总是有一个额外的大数字，但其他数字接近于零

Question

我有一个带有 10 个输出的感知器（完全连接的 cnn 层）。这些输出总是有一个或两个大的，而其他的则接近于零。我使用 ReLU 和 softmax 作为输出概率。这些奇怪输出的一些例子：

0.03676021
0.1569262
99.48537
0.03676021
0.03676021
0.03676021
0.03676021
0.10039
0.03676021
0.03676021

99.9898
5.67829E-11
3.183056E-11
9.487049E-12
0.004471419
4.597222E-11
0.005729798
5.412427E-11
1.847427E-12
6.115809E-09

0.06430105
0.06430105
0.06430105
98.25629
0.06430105
0.9340076
0.06430105
0.06430105
0.3595946
0.06430105

您还可以看到值非常相似。这是我的感知器代码：

    biasesWeights = new float[3];
    for (int i = 0; i < 3; i++)
    {
        biasesWeights[i] = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
    }   
    neurons = new float[4][];
    neurons = InitializationJaggedArr(neurons, 4, 512, 256, 32, 10);
    weights = new float[3][,];
    weights = InitializationJaggedArr(weights, 3, 512, 256, 32, 10);
}
private float[][] InitializationJaggedArr(float[][] arr, int length, params int[] x )
{
    for (int i = 0; i < length; i++)
    {
        arr[i] = new float[x[i]];
    }
    return arr;
}
private float[][,] InitializationJaggedArr(float[][,] arr, int length, params int[] x)
{
    int p = 0;
    for (int i = 0; i < length; i++)
    {
        arr[i] = new float[x[p], x[++p]];
    }
    return arr;
}
public void RandomInitializationOfWeights()
{
    for (int i = 0; i < 512; i++)
    {
        for (int j = 0; j < 256; j++)
        {
            float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
            if(val == 0)
                val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;

            weights[0][i, j] = val;
        }
    }
    for (int i = 0; i < 256; i++)
    {
        for (int j = 0; j < 32; j++)
        {
            float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
            if (val == 0)
                val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;

            weights[1][i, j] = val;
        }
    }
    for (int i = 0; i < 32; i++)
    {
        for (int j = 0; j < 10; j++)
        {
            float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
            if (val == 0)
                val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;

            weights[2][i, j] = val;
        }
    }
}
private float ReLU(float val)
{
    if (val > 0)
    {
        return val;
    }
    else
    {
        return 0;
    }
}
private float[] Softmax(float[] arr)
{
    float[] results = new float[10];
    float val = 0;
    for (int i = 0; i < 10; i++)
    {
        val += (float)Math.Exp(arr[i]);
    }
    for (int i = 0; i < 10; i++)
    {
        results[i] = (float)Math.Exp(arr[i]) / val; 
    }
    return results;
}
public float[] FeedForward(float[] inputArr)
{
    neurons[0] = inputArr;
    for (int j = 0; j < neurons[1].Length; j++)
    {
        for (int i = 0; i < neurons[0].Length; i++)
        {
            neurons[1][j] += neurons[0][i] * weights[0][i, j];
        }
        neurons[1][j] += 1 * biasesWeights[0];
        neurons[1][j] = ReLU(neurons[1][j]);
    }
    for (int j = 0; j < neurons[2].Length; j++)
    {
        for (int i = 0; i < neurons[1].Length; i++)
        {
            neurons[2][j] += neurons[1][i] * weights[1][i, j];
        }
        neurons[2][j] += 1 * biasesWeights[1];
        neurons[2][j] = ReLU(neurons[2][j]);
    }
    for (int j = 0; j < neurons[3].Length; j++)
    {
        for (int i = 0; i < neurons[2].Length; i++)
        {
            neurons[3][j] += neurons[2][i] * weights[2][i, j];
        }
        neurons[3][j] += 1 * biasesWeights[2];
        neurons[3][j] = ReLU(neurons[3][j]);
    }
    return Softmax(neurons[3]);
}

以及卷积层的代码：

private static float[][][,] filters;
public ConvolutinalLayer()
{
    filters = new float[2][][,];
    filters = InitiaizationJaggedMatrixArr(filters, 5);
}
public void RandomSetFilters()
{
    for (int i = 0; i < 2; i++)
    {
        for (int j = 0; j < 32; j++)
        {
            for (int o = 0; o < 5; o++)
            {
                for (int h = 0; h < 5; h++)
                {
                    filters[i][j][o, h] = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100; 
                }
            }
        }
    }
}
public float[] FeedForward(DigitImage image)
{
    float[][,] arrImage = new float[1][,];
    arrImage = InitiaizationJaggedMatrixArr(arrImage, 28);
    for (int i = 0; i < 28; i++)
    {
        for (int j = 0; j < 28; j++)
        {
            arrImage[0][i, j] = (image.pixels[i][j]*2/255)+-1;
        }
    }


    float[][,] conv1 = ConvolutionalLayer(arrImage, 32, 24, 0);
    float[][,] active1 = ActivationLayer(conv1, 32, 24);
    float[][,] pool1 = PoolingLayer(active1, 32, 12);
    float[][,] conv2 = ConvolutionalLayer(pool1, 32, 8, 1);
    float[][,] active2 = ActivationLayer(conv2, 32, 8);
    float[][,] pool2 = PoolingLayer(active2, 32, 4);
    return FlattingLayer(pool2, 32, 4);
}
private float[][,] InitiaizationJaggedMatrixArr(float[][,] arr, int scale)
{
    for (int i = 0; i < arr.Length; i++)
    {
        arr[i] = new float[scale, scale];
    }
    return arr;
}
private float[][][,] InitiaizationJaggedMatrixArr(float[][][,] arr, int scale)
{
    arr[0] = new float[32][,];
    arr[1] = new float[32][,];

    for (int i = 0; i < 2; i++)
    {
        for (int j = 0; j < 32; j++)
        {
            arr[i][j] = new float[scale, scale];
        }
    }
    return arr;
}
private float[][,] ConvolutionalLayer(float[][,] layer, int depthOutput, int scale, int numFilter)
{
    float[][,] arr = new float[depthOutput][,];
    arr = InitiaizationJaggedMatrixArr(arr, scale);
    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                float val = 0;
                for (int o = 0; o < 5; o++)
                {
                    for (int e = 0; e < 5; e++)
                    {
                        if (numFilter == 0)
                        {
                            val += layer[0][i + o, j + e] * filters[numFilter][h][o, e];
                        }
                        else
                        {
                            val += layer[h][i + o, j + e] * filters[numFilter][h][o, e];
                        }
                    }
                }
                arr[h][i, j] = val;
            }
        }
    }

    return arr;
}
private float[][,] ConvolutionalLayer(float[][,] layer, int depthOutput, int scale, int numFilter, int zeroPadding)
{
    float[][,] arr = new float[depthOutput][,];
    arr = InitiaizationJaggedMatrixArr(arr, scale);
    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                float val = 0;
                for (int o = 0; o < filters.GetLength(2); o++)
                {
                    for (int e = 0; e < filters.GetLength(2); e++)
                    {
                        if (numFilter == 0)
                        {
                            val += layer[0][i + o, j + e] * filters[numFilter][h][o, e];
                        }
                        else
                        {
                            val += layer[h][i + o, j + e] * filters[numFilter][h][o, e];
                        }
                    }
                }
                arr[h][i, j] = val;
            }
        }
    }

    return arr;
}
private float[][,] ActivationLayer(float[][,] layer, int depthOutput, int scale)
{
    float[][,] arr = new float[depthOutput][,];
    arr = InitiaizationJaggedMatrixArr(arr, scale);
    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                arr[h][i, j] = ReLU(layer[h][i, j]);
            }
        }
    }
    return arr;
}
private float ReLU(float val)
{
    if (val > 0)
    {
        return val;
    }
    else
    {
        return 0;
    }
}
private float[][,] PoolingLayer(float[][,] layer, int depthOutput, int scale)
{
    float[][,] arr = new float[depthOutput][,];
    arr = InitiaizationJaggedMatrixArr(arr, scale);

    for (int h = 0; h < depthOutput; h++)
    {
        for (int i = 0; i < scale; i = i + 2)
        {
            for (int j = 0; j < scale; j = j + 2)
            {
                float val = 0;
                for (int o = 0; o < 2; o++)
                {
                    for (int e = 0; e < 2; e++)
                    {
                        if (val < layer[h][i + o, j + e])
                        {
                            val = layer[h][i + o, j + e];
                        }
                    }
                }
                arr[h][i/2, j/2] = val;
            }
        }
    }
    return arr;
}
private float[] FlattingLayer(float[][,] layer, int depthInput, int scale)
{
    float[] arr = new float[512];
    int p = 0;
    for (int h = 0; h < depthInput; h++)
    {
        for (int i = 0; i < scale; i++)
        {
            for (int j = 0; j < scale; j++)
            {
                arr[p] = layer[h][i, j];
                p++;
            }
        }
    }
    return arr;
}

那么它实际上是一个问题吗，如果它是如何解决的 PS我还没有训练过cnn

c# - 我在前馈的输出中总是有一个额外的大数字，但其他数字接近于零

0 回答 0

Related

Reference