我正在训练用于细节点检测的指纹数据集。我的输入是重新缩放的(128 x 128)拍打指纹图像,Ground Truth 是 12 通道细节点。每个通道代表细节方向的箱,而点标记在细节的 x、y 位置。以输入和 GT 通道(3 和 8)为例。
由于 tanh 模型输出,输入图像在 -1 和 1 之间进行归一化
问题是我正在接收如下所示的中间预测层输出,这些输出是难以理解的。
这是我的简单自动编码器模型:
import torch.nn.functional as F
class AEC(nn.Module):
def __init__(self,in_ch=3,out_ch=12):
super(AEC,self).__init__()
print('in_ch=',in_ch)
print('out_ch=',out_ch)
self.conv1 = nn.Conv2d(in_ch, 16, 2, stride=2)
self.conv2 = nn.Conv2d(16, 32, 2, stride=2)
self.conv3 = nn.Conv2d(32, 64, 2, stride=2)
self.conv4 = nn.Conv2d(64, 128, 2, stride=2)
#Decoder
self.t_conv1 = nn.ConvTranspose2d(128, 64, 2, stride=2)
self.t_conv2 = nn.ConvTranspose2d(64, 32, 2, stride=2)
self.t_conv3 = nn.ConvTranspose2d(32, 16, 2, stride=2)
self.t_conv4 = nn.ConvTranspose2d(16, out_ch, 2, stride=2)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = F.relu(self.t_conv1(x))
x = F.relu(self.t_conv2(x))
x = F.relu(self.t_conv3(x))
x = F.tanh(self.t_conv4(x))
return x
用于训练和查看中间张量板输出的代码:
for i, data in tqdm(enumerate(train_loader), total=len(train_loader)):
ite_num = ite_num + 1
ite_num4val = ite_num4val + 1
inputs, minmap = data['image'], data['minmap']
inputs = inputs.type(torch.FloatTensor)
minmap = minmap.type(torch.FloatTensor)
# wrap them in Variable
if torch.cuda.is_available():
inputs_v, minmap = Variable(inputs.cuda(), requires_grad=False), Variable(minmap.cuda(), requires_grad=False)
else:
inputs_v, minmap = Variable(inputs, requires_grad=False), Variable(minmap, requires_grad=False)
optimizer_min.zero_grad()
d0_min = min_net(inputs_v)
# calculate loss
min_each_loss = l2_loss_fusion(d0_min, minmap)
min_each_loss.backward()
optimizer_min.step()
running_loss_min += min_each_loss.data.item()
if(ite_num % save_tensorboard==0):
writer.add_scalars('Loss_Statistics',{'Loss_total_min': min_each_loss.data,},
global_step=ite_num)
grid_image = make_grid(inputs_v[0].clone().cpu().data, 1, normalize=True)
writer.add_image('input', grid_image, ite_num)
res = d0_min[0].clone()
res = res.sigmoid().data.cpu().numpy()[3,:,:]
res = (res - res.min()) / (res.max() - res.min() + 1e-8)
writer.add_image('Pred_layer_min_channel3', torch.tensor(res), ite_num, dataformats='HW')
res = d0_min[0].clone()
res = res.sigmoid().data.cpu().numpy()[8,:,:]
res = (res - res.min()) / (res.max() - res.min() + 1e-8)
writer.add_image('Pred_layer_min_channel8', torch.tensor(res), ite_num, dataformats='HW')
res = minmap[0].clone()
res = res.sigmoid().data.cpu().numpy()[3,:,:]
res = (res - res.min()) / (res.max() - res.min() + 1e-8)
writer.add_image('Ori_layer_min_channel3', torch.tensor(res), ite_num, dataformats='HW')
res = minmap[0].clone()
res = res.sigmoid().data.cpu().numpy()[8,:,:]
res = (res - res.min()) / (res.max() - res.min() + 1e-8)
writer.add_image('Ori_layer_min_channel8', torch.tensor(res), ite_num, dataformats='HW')
del d0_min, min_each_loss
if ite_num % save_frq == 0:
torch.save(min_net.state_dict(), model_dir + model_name+"_min_bce_itr_%d_train_tar_%3f.pth" % (ite_num, running_loss_min / ite_num4val))
running_loss_min = 0.0
min_net.train() # resume train
ite_num4val = 0
请帮助我哪里出错了。 我在张量板上可视化输出的方式有什么问题吗?