公认的答案在处理尺寸方面略有错误,因为它们是通过 padding = "VALID" 更改的(他将它们视为 padding = "SAME")。因此,在一般情况下,由于这种不匹配,代码会崩溃。我附上了他更正的代码,两种情况都得到了正确处理。
inp = tf.placeholder(tf.float32, [MB, H, W, channels_img])
# F has shape (MB, fh, fw, channels, out_channels)
# REM: with the notation in the question, we need: channels_img==channels
F = tf.transpose(F, [1, 2, 0, 3, 4])
F = tf.reshape(F, [fh, fw, channels*MB, out_channels)
inp_r = tf.transpose(inp, [1, 2, 0, 3]) # shape (H, W, MB, channels_img)
inp_r = tf.reshape(inp_r, [1, H, W, MB*channels_img])
padding = "VALID" #or "SAME"
out = tf.nn.depthwise_conv2d(
inp_r,
filter=F,
strides=[1, 1, 1, 1],
padding=padding) # here no requirement about padding being 'VALID', use whatever you want.
# Now out shape is (1, H-fh+1, W-fw+1, MB*channels*out_channels), because we used "VALID"
if padding == "SAME":
out = tf.reshape(out, [H, W, MB, channels, out_channels)
if padding == "VALID":
out = tf.reshape(out, [H-fh+1, W-fw+1, MB, channels, out_channels)
out = tf.transpose(out, [2, 0, 1, 3, 4])
out = tf.reduce_sum(out, axis=3)
# out shape is now (MB, H-fh+1, W-fw+1, out_channels)