image-processing - 每次我在数据集上运行 caffe 模型时，损失保持不变

Question

当我的模型在图像数据集上运行时计算的初始损失在每次运行 caffe 时保持不变。这种行为是不是很奇怪，因为当我们每次运行模型时，初始损失应该不同（至少略有不同）。

如果我在损失层中给出 SoftmaxWithLoss 函数，则损失值在进一步的迭代中保持不变。此外，如果我在反卷积层中给出 lr_mult=0，则损失在迭代中显得恒定。如果我给 lr_mult=1，那么损失值会发生变化，但最终会在几千次迭代后达到初始损失值。

在这方面有什么想法吗？

在这方面，我们非常感谢任何帮助。

以下是我的求解器文件：

test_iter: 100
test_interval: 100 # py solving tests
display: 100
#average_loss: 100
lr_policy: "fixed"
base_lr: 0.0000001
momentum: 0.5
iter_size: 1
# base_lr: 1e-9
# momentum: 0.99
# iter_size: 1
max_iter: 1000000
#weight_decay: 0.0005
snapshot: 1000
snapshot_diff: true 
#test_initialization: false
solver_mode: GPU

以下是我的卷积神经网络的架构。

# Input 128 640 3
# Conv1 (kernel=3) 126 638 64
# ReLU
# Conv2 (kernel=3) 124 636 128
# ReLU
# Conv3 (kernel=3) 122 634 256
# ReLU
# Pool1 (kernel=2) 61 317 256
# Conv4 (kernel=4) 58 314 512
# ReLU
# Conv5 (kernel=4) 55 311 1024
# ReLU
# Conv6 (kernel=4) 52 308 512
# ReLU
# Pool2 (kernel=2) 26 154 512
# Conv7 (kernel=4,stride=2,pad=3) 15 79 5
# ReLU
# Decon1 (kernel=16,stride=8,pad=3) 128 640 5
# ReLU
# Loss

name: "Conv-Deconv-Net"
layer {
  name: "data"
  type: "Data"
  top: "data"
  include {
    phase: TRAIN
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_IMG_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "label"
  type: "Data"
  top: "label"
  include {
    phase: TRAIN
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_LBL_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  include {
    phase: TEST
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_IMG_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "label"
  type: "Data"
  top: "label"
  include {
    phase: TEST
  }
  data_param {
    source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_LBL_LMDB_olpywm"
    batch_size: 4
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 3
    stride: 1
    pad: 0
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "relu1"
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "relu1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "relu2"
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "relu2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "relu3"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "relu3"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv4"
  type: "Convolution"
  bottom: "pool1"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 4
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "relu4"
}

layer {
  name: "conv5"
  type: "Convolution"
  bottom: "relu4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 0
    kernel_size: 4
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "relu5"
}

layer {
  name: "conv6"
  type: "Convolution"
  bottom: "relu5"
  top: "conv6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 4
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "conv6"
  top: "relu6"
}

layer {
  name: "pool2"
  type: "Pooling"
  bottom: "relu6"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv7"
  type: "Convolution"
  bottom: "pool2"
  top: "conv7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 5
    pad: 3
    kernel_size: 4
    stride: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.001
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "conv7"
  top: "relu7"
}


layer {
  name: "deconv1"
  type: "Deconvolution"
  bottom: "relu7"
  top: "deconv1"
  param {
    lr_mult: 1
  }
  convolution_param {
    num_output: 5
    bias_term: false
    kernel_size: 16
    stride: 8
    pad: 0
    weight_filler {
      type: "bilinear"
    }
  }
}
layer {
  name: "relu8"
  type: "ReLU"
  bottom: "deconv1"
  top: "relu8"
}


#layer {
#  name: "crop"
#  type: "Crop"
#  bottom: "deconv3"
#  bottom: "data"
#  top: "score"
#}
layer {
  name: "prob"
  type: "SoftmaxWithLoss"
  bottom: "relu8"
  bottom: "label"
  top: "loss"
  loss_param {
    # ignore_label: 255
    # normalize: true
    normalize: false
  }
}

score 0 · Accepted Answer

看来你base_lr的太小了。一开始就保持不变，0.00001然后尝试保持momentum不变0.9。如果你的学习率太小，收敛会很慢，如果太高，梯度下降会超过局部最小值（那是你看到你的损失激增的时候）。因此，您必须迭代地达到最佳值。这没有什么神奇的数字。

image-processing - 每次我在数据集上运行 caffe 模型时，损失保持不变

1 回答 1

Related

Reference