我有一个带有 10 个输出的感知器(完全连接的 cnn 层)。这些输出总是有一个或两个大的,而其他的则接近于零。我使用 ReLU 和 softmax 作为输出概率。这些奇怪输出的一些例子:
- 0.03676021
- 0.1569262
- 99.48537
- 0.03676021
- 0.03676021
- 0.03676021
- 0.03676021
- 0.10039
- 0.03676021
- 0.03676021
- 99.9898
- 5.67829E-11
- 3.183056E-11
- 9.487049E-12
- 0.004471419
- 4.597222E-11
- 0.005729798
- 5.412427E-11
- 1.847427E-12
- 6.115809E-09
- 0.06430105
- 0.06430105
- 0.06430105
- 98.25629
- 0.06430105
- 0.9340076
- 0.06430105
- 0.06430105
- 0.3595946
- 0.06430105
您还可以看到值非常相似。这是我的感知器代码:
biasesWeights = new float[3];
for (int i = 0; i < 3; i++)
{
biasesWeights[i] = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
}
neurons = new float[4][];
neurons = InitializationJaggedArr(neurons, 4, 512, 256, 32, 10);
weights = new float[3][,];
weights = InitializationJaggedArr(weights, 3, 512, 256, 32, 10);
}
private float[][] InitializationJaggedArr(float[][] arr, int length, params int[] x )
{
for (int i = 0; i < length; i++)
{
arr[i] = new float[x[i]];
}
return arr;
}
private float[][,] InitializationJaggedArr(float[][,] arr, int length, params int[] x)
{
int p = 0;
for (int i = 0; i < length; i++)
{
arr[i] = new float[x[p], x[++p]];
}
return arr;
}
public void RandomInitializationOfWeights()
{
for (int i = 0; i < 512; i++)
{
for (int j = 0; j < 256; j++)
{
float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
if(val == 0)
val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
weights[0][i, j] = val;
}
}
for (int i = 0; i < 256; i++)
{
for (int j = 0; j < 32; j++)
{
float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
if (val == 0)
val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
weights[1][i, j] = val;
}
}
for (int i = 0; i < 32; i++)
{
for (int j = 0; j < 10; j++)
{
float val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
if (val == 0)
val = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
weights[2][i, j] = val;
}
}
}
private float ReLU(float val)
{
if (val > 0)
{
return val;
}
else
{
return 0;
}
}
private float[] Softmax(float[] arr)
{
float[] results = new float[10];
float val = 0;
for (int i = 0; i < 10; i++)
{
val += (float)Math.Exp(arr[i]);
}
for (int i = 0; i < 10; i++)
{
results[i] = (float)Math.Exp(arr[i]) / val;
}
return results;
}
public float[] FeedForward(float[] inputArr)
{
neurons[0] = inputArr;
for (int j = 0; j < neurons[1].Length; j++)
{
for (int i = 0; i < neurons[0].Length; i++)
{
neurons[1][j] += neurons[0][i] * weights[0][i, j];
}
neurons[1][j] += 1 * biasesWeights[0];
neurons[1][j] = ReLU(neurons[1][j]);
}
for (int j = 0; j < neurons[2].Length; j++)
{
for (int i = 0; i < neurons[1].Length; i++)
{
neurons[2][j] += neurons[1][i] * weights[1][i, j];
}
neurons[2][j] += 1 * biasesWeights[1];
neurons[2][j] = ReLU(neurons[2][j]);
}
for (int j = 0; j < neurons[3].Length; j++)
{
for (int i = 0; i < neurons[2].Length; i++)
{
neurons[3][j] += neurons[2][i] * weights[2][i, j];
}
neurons[3][j] += 1 * biasesWeights[2];
neurons[3][j] = ReLU(neurons[3][j]);
}
return Softmax(neurons[3]);
}
以及卷积层的代码:
private static float[][][,] filters;
public ConvolutinalLayer()
{
filters = new float[2][][,];
filters = InitiaizationJaggedMatrixArr(filters, 5);
}
public void RandomSetFilters()
{
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < 32; j++)
{
for (int o = 0; o < 5; o++)
{
for (int h = 0; h < 5; h++)
{
filters[i][j][o, h] = (float)UnityEngine.Mathf.RoundToInt(UnityEngine.Random.Range(-0.5f, 0.5f) * 100) / 100;
}
}
}
}
}
public float[] FeedForward(DigitImage image)
{
float[][,] arrImage = new float[1][,];
arrImage = InitiaizationJaggedMatrixArr(arrImage, 28);
for (int i = 0; i < 28; i++)
{
for (int j = 0; j < 28; j++)
{
arrImage[0][i, j] = (image.pixels[i][j]*2/255)+-1;
}
}
float[][,] conv1 = ConvolutionalLayer(arrImage, 32, 24, 0);
float[][,] active1 = ActivationLayer(conv1, 32, 24);
float[][,] pool1 = PoolingLayer(active1, 32, 12);
float[][,] conv2 = ConvolutionalLayer(pool1, 32, 8, 1);
float[][,] active2 = ActivationLayer(conv2, 32, 8);
float[][,] pool2 = PoolingLayer(active2, 32, 4);
return FlattingLayer(pool2, 32, 4);
}
private float[][,] InitiaizationJaggedMatrixArr(float[][,] arr, int scale)
{
for (int i = 0; i < arr.Length; i++)
{
arr[i] = new float[scale, scale];
}
return arr;
}
private float[][][,] InitiaizationJaggedMatrixArr(float[][][,] arr, int scale)
{
arr[0] = new float[32][,];
arr[1] = new float[32][,];
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < 32; j++)
{
arr[i][j] = new float[scale, scale];
}
}
return arr;
}
private float[][,] ConvolutionalLayer(float[][,] layer, int depthOutput, int scale, int numFilter)
{
float[][,] arr = new float[depthOutput][,];
arr = InitiaizationJaggedMatrixArr(arr, scale);
for (int h = 0; h < depthOutput; h++)
{
for (int i = 0; i < scale; i++)
{
for (int j = 0; j < scale; j++)
{
float val = 0;
for (int o = 0; o < 5; o++)
{
for (int e = 0; e < 5; e++)
{
if (numFilter == 0)
{
val += layer[0][i + o, j + e] * filters[numFilter][h][o, e];
}
else
{
val += layer[h][i + o, j + e] * filters[numFilter][h][o, e];
}
}
}
arr[h][i, j] = val;
}
}
}
return arr;
}
private float[][,] ConvolutionalLayer(float[][,] layer, int depthOutput, int scale, int numFilter, int zeroPadding)
{
float[][,] arr = new float[depthOutput][,];
arr = InitiaizationJaggedMatrixArr(arr, scale);
for (int h = 0; h < depthOutput; h++)
{
for (int i = 0; i < scale; i++)
{
for (int j = 0; j < scale; j++)
{
float val = 0;
for (int o = 0; o < filters.GetLength(2); o++)
{
for (int e = 0; e < filters.GetLength(2); e++)
{
if (numFilter == 0)
{
val += layer[0][i + o, j + e] * filters[numFilter][h][o, e];
}
else
{
val += layer[h][i + o, j + e] * filters[numFilter][h][o, e];
}
}
}
arr[h][i, j] = val;
}
}
}
return arr;
}
private float[][,] ActivationLayer(float[][,] layer, int depthOutput, int scale)
{
float[][,] arr = new float[depthOutput][,];
arr = InitiaizationJaggedMatrixArr(arr, scale);
for (int h = 0; h < depthOutput; h++)
{
for (int i = 0; i < scale; i++)
{
for (int j = 0; j < scale; j++)
{
arr[h][i, j] = ReLU(layer[h][i, j]);
}
}
}
return arr;
}
private float ReLU(float val)
{
if (val > 0)
{
return val;
}
else
{
return 0;
}
}
private float[][,] PoolingLayer(float[][,] layer, int depthOutput, int scale)
{
float[][,] arr = new float[depthOutput][,];
arr = InitiaizationJaggedMatrixArr(arr, scale);
for (int h = 0; h < depthOutput; h++)
{
for (int i = 0; i < scale; i = i + 2)
{
for (int j = 0; j < scale; j = j + 2)
{
float val = 0;
for (int o = 0; o < 2; o++)
{
for (int e = 0; e < 2; e++)
{
if (val < layer[h][i + o, j + e])
{
val = layer[h][i + o, j + e];
}
}
}
arr[h][i/2, j/2] = val;
}
}
}
return arr;
}
private float[] FlattingLayer(float[][,] layer, int depthInput, int scale)
{
float[] arr = new float[512];
int p = 0;
for (int h = 0; h < depthInput; h++)
{
for (int i = 0; i < scale; i++)
{
for (int j = 0; j < scale; j++)
{
arr[p] = layer[h][i, j];
p++;
}
}
}
return arr;
}
那么它实际上是一个问题吗,如果它是如何解决的 PS我还没有训练过cnn