我有以下精简模型:
import torch.nn as nn
import torch
import argparse
import torch
import torch.utils.data
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image
import json
import numpy as np
import datetime
import os
class EncoderRNN(nn.Module):
def __init__(self, input_size=8, hidden_size=10, num_layers=2):
super(EncoderRNN, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.relu = nn.ReLU()
self.sigmoid = nn.Sigmoid()
#initialize weights
nn.init.xavier_uniform(self.lstm.weight_ih_l0, gain=np.sqrt(2))
nn.init.xavier_uniform(self.lstm.weight_hh_l0, gain=np.sqrt(2))
def forward(self, input):
tt = torch
print input.shape
h0 = Variable(tt.FloatTensor(self.num_layers, input.size(0), self.hidden_size))
c0 = Variable(tt.FloatTensor(self.num_layers, input.size(0), self.hidden_size))
encoded_input, hidden = self.lstm(input, (h0, c0))
encoded_input = self.sigmoid(encoded_input)
return encoded_input
train_x = torch.from_numpy(np.random.random((2000,19,8))).float()
train_loader = torch.utils.data.DataLoader(train_x,
batch_size=64, shuffle=True)
model = EncoderRNN()
optimizer = optim.Adam(model.parameters(), lr=1e-6)
optimizer.zero_grad()
loss_function = torch.nn.BCELoss(reduce=True)
def train(epoch):
model.train()
train_loss = 0
for batch_idx, (data_x) in enumerate(train_loader):
x = model(Variable(data_x))
print("output has nan: " + str(np.isnan(x.detach().numpy()).any()))
train(0)
总而言之,我认为我基本上只是将输入输入到具有随机初始化隐藏值的 LSTM 中,然后获取该 LSTM 输出的 sigmoid。然后我将该输出馈送到解码器 LSTM,并取解码器输出的 sigmoid 并将其用作我的最终值。
不幸的是,即使在第一次迭代中,模型也经常输出正确形状的向量(batch_size、seq_length、seq_dim),但至少包含一个,有时是所有 NaN 值。我究竟做错了什么?
谢谢!
到目前为止我已经尝试过:
- 将 LSTM 更改为 GRU
- 将 sigmoid 更改为 relu
- 改变隐藏表示的维度
- 将失败的输入传递给编码器
编辑:向所有在我破坏代码时试图提供帮助的人道歉——我真的很珍惜你的时间,非常感谢你的帮助!