我正在尝试在 dl4j 中制作 AutoEncoder。
输入:200 个整数,范围从 0 ~ 大约 40000。
型号代码:
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.seed(seed)
.iterations(ITERATIONS)
.learningRate(LEARNING_RATE)
.optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT)
.list()
.layer(0, new RBM.Builder().nIn(numRows * numColumns).nOut(150).activation(Activation.RELU).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build())
.layer(1, new RBM.Builder().nIn(150).nOut(100).activation(Activation.RELU).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build())
.layer(2, new RBM.Builder().nIn(100).nOut(150).activation(Activation.RELU).lossFunction(LossFunctions.LossFunction.KL_DIVERGENCE).build())
.layer(3, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.RELU).nIn(150).nOut(numRows*numColumns).build())
.pretrain(true).backprop(true)
.build();
model = new MultiLayerNetwork(conf);
以及每一层的配置:
> NeuralNetConfiguration(layer=RBM(super=BasePretrainNetwork(super=FeedForwardLayer(super=Layer(layerName=layer0, activationFn=relu, weightInit=XAVIER, biasInit=0.0, dist=null, learningRate=0.001, biasLearningRate=0.001, learningRateSchedule=null, momentum=NaN, momentumSchedule=null, l1=0.0, l2=0.0, l1Bias=0.0, l2Bias=0.0, dropOut=0.0, updater=SGD, rho=NaN, epsilon=NaN, rmsDecay=NaN, adamMeanDecay=NaN, adamVarDecay=NaN, gradientNormalization=None, gradientNormalizationThreshold=1.0), nIn=200, nOut=150), lossFunction=KL_DIVERGENCE, visibleBiasInit=0.0, preTrainIterations=1), hiddenUnit=BINARY, visibleUnit=BINARY, k=1, sparsity=0.0), leakyreluAlpha=0.0, miniBatch=true, numIterations=50, maxNumLineSearchIterations=5, seed=123, optimizationAlgo=LINE_GRADIENT_DESCENT, variables=[W, b, vb], stepFunction=null, useRegularization=false, useDropConnect=false, minimize=true, learningRateByParam={b=0.001, W=0.001, vb=0.001}, l1ByParam={b=0.0, W=0.0, vb=0.0}, l2ByParam={b=0.0, W=0.0, vb=0.0}, learningRatePolicy=None, lrPolicyDecayRate=NaN, lrPolicySteps=NaN, lrPolicyPower=NaN, pretrain=true, iterationCount=0)
> NeuralNetConfiguration(layer=RBM(super=BasePretrainNetwork(super=FeedForwardLayer(super=Layer(layerName=layer1, activationFn=relu, weightInit=XAVIER, biasInit=0.0, dist=null, learningRate=0.001, biasLearningRate=0.001, learningRateSchedule=null, momentum=NaN, momentumSchedule=null, l1=0.0, l2=0.0, l1Bias=0.0, l2Bias=0.0, dropOut=0.0, updater=SGD, rho=NaN, epsilon=NaN, rmsDecay=NaN, adamMeanDecay=NaN, adamVarDecay=NaN, gradientNormalization=None, gradientNormalizationThreshold=1.0), nIn=150, nOut=100), lossFunction=KL_DIVERGENCE, visibleBiasInit=0.0, preTrainIterations=1), hiddenUnit=BINARY, visibleUnit=BINARY, k=1, sparsity=0.0), leakyreluAlpha=0.0, miniBatch=true, numIterations=50, maxNumLineSearchIterations=5, seed=123, optimizationAlgo=LINE_GRADIENT_DESCENT, variables=[W, b, vb], stepFunction=null, useRegularization=false, useDropConnect=false, minimize=true, learningRateByParam={b=0.001, W=0.001, vb=0.001}, l1ByParam={b=0.0, W=0.0, vb=0.0}, l2ByParam={b=0.0, W=0.0, vb=0.0}, learningRatePolicy=None, lrPolicyDecayRate=NaN, lrPolicySteps=NaN, lrPolicyPower=NaN, pretrain=true, iterationCount=0)
> NeuralNetConfiguration(layer=RBM(super=BasePretrainNetwork(super=FeedForwardLayer(super=Layer(layerName=layer2, activationFn=relu, weightInit=XAVIER, biasInit=0.0, dist=null, learningRate=0.001, biasLearningRate=0.001, learningRateSchedule=null, momentum=NaN, momentumSchedule=null, l1=0.0, l2=0.0, l1Bias=0.0, l2Bias=0.0, dropOut=0.0, updater=SGD, rho=NaN, epsilon=NaN, rmsDecay=NaN, adamMeanDecay=NaN, adamVarDecay=NaN, gradientNormalization=None, gradientNormalizationThreshold=1.0), nIn=100, nOut=150), lossFunction=KL_DIVERGENCE, visibleBiasInit=0.0, preTrainIterations=1), hiddenUnit=BINARY, visibleUnit=BINARY, k=1, sparsity=0.0), leakyreluAlpha=0.0, miniBatch=true, numIterations=50, maxNumLineSearchIterations=5, seed=123, optimizationAlgo=LINE_GRADIENT_DESCENT, variables=[W, b, vb], stepFunction=null, useRegularization=false, useDropConnect=false, minimize=true, learningRateByParam={b=0.001, W=0.001, vb=0.001}, l1ByParam={b=0.0, W=0.0, vb=0.0}, l2ByParam={b=0.0, W=0.0, vb=0.0}, learningRatePolicy=None, lrPolicyDecayRate=NaN, lrPolicySteps=NaN, lrPolicyPower=NaN, pretrain=true, iterationCount=0)
> NeuralNetConfiguration(layer=OutputLayer(super=BaseOutputLayer(super=FeedForwardLayer(super=Layer(layerName=layer3, activationFn=relu, weightInit=XAVIER, biasInit=0.0, dist=null, learningRate=0.001, biasLearningRate=0.001, learningRateSchedule=null, momentum=NaN, momentumSchedule=null, l1=0.0, l2=0.0, l1Bias=0.0, l2Bias=0.0, dropOut=0.0, updater=SGD, rho=NaN, epsilon=NaN, rmsDecay=NaN, adamMeanDecay=NaN, adamVarDecay=NaN, gradientNormalization=None, gradientNormalizationThreshold=1.0), nIn=150, nOut=200), lossFn=LossMSE())), leakyreluAlpha=0.0, miniBatch=true, numIterations=50, maxNumLineSearchIterations=5, seed=123, optimizationAlgo=LINE_GRADIENT_DESCENT, variables=[W, b], stepFunction=null, useRegularization=false, useDropConnect=false, minimize=true, learningRateByParam={b=0.001, W=0.001}, l1ByParam={b=0.0, W=0.0}, l2ByParam={b=0.0, W=0.0}, learningRatePolicy=None, lrPolicyDecayRate=NaN, lrPolicySteps=NaN, lrPolicyPower=NaN, pretrain=false, iterationCount=0)
每一层的输出:
input layer: [100.77, 114.58, 115.04, 120.97, 122.08, 125.99, 129.05, 130.05, 133.98, 135.00, 141.02, 141.27, 142.94, 142.95, 144.06, 145.56, 152.03, 157.08, 172.06, 175.46, 185.04, 190.67, 203.05, 204.33, 215.02, 215.57, 217.07, 224.26, 239.16, 268.82, 274.93, 283.23, 316.76, 325.57, 353.85, 365.11, 366.11, 412.84, 584.85, 626.83, 632.70, 653.72, 655.95, 862.54, 878.53, 909.94, 1,000.67, 1,494.69, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00]
1st layer: [1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 0.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 0.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00]
2nd layer: [0.99, 0.99, 0.98, 0.98, 0.92, 0.97, 0.98, 0.99, 0.99, 0.99, 0.99, 0.99, 0.96, 0.99, 1.00, 1.00, 1.00, 0.96, 0.98, 0.98, 0.98, 0.99, 0.98, 0.98, 0.97, 0.99, 0.99, 0.97, 0.99, 0.99, 1.00, 0.99, 0.96, 0.97, 0.94, 0.99, 0.97, 0.99, 0.97, 0.99, 0.99, 0.98, 0.99, 0.97, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.99, 0.98, 0.98, 0.99, 0.94, 1.00, 0.99, 0.98, 0.99, 0.99, 0.93, 0.95, 0.98, 0.87, 0.99, 0.99, 0.98, 0.98, 0.99, 0.99, 0.99, 1.00, 0.99, 0.98, 1.00, 0.98, 0.98, 0.97, 0.98, 0.99, 1.00, 1.00, 0.98, 1.00, 0.98, 0.99, 0.98, 0.98, 0.99, 0.96, 0.99, 0.96, 0.99, 0.98, 0.99, 0.98, 0.99]
3rd layer: [1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 1.00]
output layer: [0.00, 57.55, 60.74, 64.26, 0.00, 0.00, 0.00, 78.81, 82.78, 0.00, 89.25, 97.19, 102.29, 0.00, 113.66, 0.00, 0.00, 0.00, 143.83, 152.64, 163.06, 0.00, 185.16, 0.00, 0.00, 0.00, 238.34, 250.02, 0.00, 271.38, 274.44, 278.98, 0.00, 0.00, 0.00, 267.09, 262.82, 243.22, 232.50, 226.24, 213.34, 202.92, 187.23, 0.00, 158.19, 0.00, 140.41, 0.00, 112.42, 0.00, 102.14, 99.55, 0.00, 0.00, 86.44, 71.89, 71.72, 67.41, 0.00, 0.00, 61.82, 59.30, 55.83, 57.85, 0.00, 55.88, 0.00, 50.60, 0.00, 0.00, 32.82, 44.75, 43.48, 0.00, 0.00, 0.00, 0.00, 39.59, 36.48, 0.00, 32.91, 33.21, 0.00, 0.00, 34.55, 33.58, 0.00, 31.78, 28.79, 0.00, 0.00, 0.00, 0.00, 0.00, 26.47, 25.76, 25.15, 0.00, 0.00, 0.00, 22.14, 23.24, 23.87, 0.00, 21.50, 21.14, 0.00, 20.81, 20.28, 18.40, 0.00, 17.48, 0.00, 17.36, 0.00, 16.28, 16.22, 16.20, 13.52, 0.00, 0.00, 13.68, 14.14, 0.00, 13.91, 13.99, 0.00, 13.77, 13.27, 0.00, 12.81, 12.95, 12.50, 12.00, 12.12, 0.00, 12.43, 12.04, 0.00, 12.38, 12.16, 12.26, 12.65, 0.00, 12.32, 12.88, 12.43, 12.09, 0.00, 0.00, 11.94, 0.00, 12.66, 12.55, 0.00, 0.00, 12.26, 11.84, 11.43, 11.57, 0.00, 0.00, 11.93, 12.14, 12.99, 13.04, 12.53, 13.38, 0.00, 0.00, 13.64, 0.00, 0.00, 13.23, 0.00, 13.72, 0.00, 0.00, 15.13, 6.50, 0.00, 13.89, 0.00, 14.86, 0.00, 13.98, 14.58, 0.00, 14.74, 0.00, 14.90, 15.53, 15.77, 16.12, 0.00, 16.85, 17.68, 0.00, 19.60, 0.00]
我在网络中的所有单元都使用了 ReLu 激活函数。但是,当我打印每一层的输出时,我在所有隐藏层得到 0~1 个值。
我不知道。怎么了?我应该使用 Sigmoid 而不是 ReLu 和标准化输入吗?