0

我正在尝试完成一项任务并编写简单的 RNN。这是课程:

class RNNBaseline(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
             bidirectional, dropout, pad_idx):
    
    super().__init__()
    
    self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
    
    self.rnn = nn.GRU(input_size=embedding_dim, hidden_size=hidden_dim) #RNN(embedding_dim, hidden_dim)
    
    self.fc = nn.Linear(hidden_dim, output_dim)  # YOUR CODE GOES HERE

    self.dropout = nn.Dropout(dropout)
    
    
def forward(self, text, text_lengths, hidden = None):
    
    #text = [sent len, batch size]
    
    embedded = self.embedding(text)
    
    #embedded = [sent len, batch size, emb dim]
    
    #pack sequence
    packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
    
    # cell arg for LSTM, remove for GRU
    # packed_output, (hidden, cell) = self.rnn(packed_embedded)
    # unpack sequence
    # output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)  

    #output = [sent len, batch size, hid dim * num directions]
    #output over padding tokens are zero tensors
    
    #hidden = [num layers * num directions, batch size, hid dim]
    #cell = [num layers * num directions, batch size, hid dim]
    
    #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
    #and apply dropout
    output, hidden = self.rnn(packed_embedded, hidden)
    #hidden = None  # concatenate
            
    #hidden = [batch size, hid dim * num directions] or [batch_size, hid dim * num directions]
        
    return self.fc(hidden)

现在我没有使用 LSTM 或尝试做双向 RNN,我只想要简单的 GRU 进行无错误的训练。这是训练函数:

import numpy as np

min_loss = np.inf

cur_patience = 0

for epoch in range(1, max_epochs + 1):
train_loss = 0.0
model.train()
pbar = tqdm(enumerate(train_iter), total=len(train_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, ((text, txt_len), label) in pbar: 
    #YOUR CODE GOES HERE
    opt.zero_grad()
    input = text.to(device)
    labels = label.to(device)
    output = model(input, txt_len.type(torch.int64).cpu())
    train_loss = loss_func(output, labels)
    train_loss.backward()
    opt.step()


train_loss /= len(train_iter)
val_loss = 0.0
model.eval()
pbar = tqdm(enumerate(valid_iter), total=len(valid_iter), leave=False)
pbar.set_description(f"Epoch {epoch}")
for it, ((text, txt_len), label) in pbar:
    # YOUR CODE GOES HERE
    input = text.to(device)
    labels = label.to(device)
    output = model(input, txt_len.type(torch.int64).cpu())
    val_loss = loss_func(output, labels)

val_loss /= len(valid_iter)
if val_loss < min_loss:
    min_loss = val_loss
    best_model = model.state_dict()
else:
    cur_patience += 1
    if cur_patience == patience:
        cur_patience = 0
        break

print('Epoch: {}, Training Loss: {}, Validation Loss: {}'.format(epoch, train_loss, val_loss))
model.load_state_dict(best_model)

还有一些变量:

vocab_size = len(TEXT.vocab)
emb_dim = 100
hidden_dim = 256
output_dim = 1
n_layers = 2
bidirectional = False
dropout = 0.2
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
patience=3

opt = torch.optim.Adam(model.parameters())
loss_func = nn.BCEWithLogitsLoss()

max_epochs = 1

但我得到这个错误:

ValueError: 目标尺寸 (torch.Size([64])) 必须与输入尺寸 (torch.Size([1, 64, 1])) 相同

...在这一行:

---> 18 train_loss = loss_func(输出,标签)

我究竟做错了什么?

4

1 回答 1

1

nn.BCEWithLogitsLoss期望outputsand targets(或在您的情况下labels)的大小[b,d]b批量大小,并且d是类的数量(或您要预测的任何内容的维度)。目前,您的输出是 size[b,d,1]并且您的目标是 size [d]。两个修复是必要的,而且都非常简单:

  1. 将批次维度添加到您的目标 ( labels)。这是使用返回数据元素的数据集时的常见错误,因为它通常不添加批处理维度。将您的数据集类封装在 pytorchdataloader中,但如果您不想这样做,只需添加一个unsqueeze()操作。请注意,unsqueeze 操作仅适用于批量大小为 1 的情况,否则使用dataloader可能是更好的选择。

  2. 您的输出有一个空的第三维,可以很容易地通过squeeze()操作展平。unsqueeze 和squeeze 都是可微的,因此不应该给反向传播带来问题。

... code before here

for it, ((text, txt_len), label) in pbar:
    # YOUR CODE GOES HERE
    input = text.to(device)
    labels = label.to(device).unsqueeze(0)                 # added unsqueeze operation
    output = model(input, txt_len.type(torch.int64).cpu())
    output = output.squeeze(-1)                            # added squeeze on last dim
    val_loss = loss_func(output, labels)

... code after here
于 2021-10-27T14:23:38.727 回答