0

我是 PyTorch 的新手,我正在尝试构建一个 BiLSTM 模型以将其输出插入到 MaxPool1d 层和 AvgPool1d 层中,然后将两个层的输出连接起来以进行二进制分类任务。我正在使用预训练的 Word2Vec 嵌入作为输入:

    import torch.nn as nn
    from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
    from torch.autograd import Variable
    import torch.nn.functional as F
    
    class LSTM(nn.Module):
    
        # define all the layers used in model
        def __init__(self, vocab_size, embedding_dim, hidden_dim , num_classes, lstm_layers, weights):
            super().__init__()
            self.embedding = nn.Embedding(vocab_size, embedding_dim)
            self.embedding.weight.data.copy_(torch.from_numpy(weights))
            self.embedding.weight.requires_grad = False 
            self.lstm = nn.LSTM(embedding_dim,
                                lstm_units,
                                num_layers=lstm_layers,
                                bidirectional=True,
                                batch_first=True)
            num_directions = 2 #if bidirectional else 1
            self.m1 = nn.MaxPool1d(1,stride= 1)
            self.m2 = nn.AvgPool1d(1,stride= 1)
            self.fc1 = nn.Linear(lstm_units * num_directions, hidden_dim)
            self.fc2 = nn.Linear(hidden_dim, num_classes)
            self.relu = nn.ReLU()
            self.softmax = nn.Softmax() 
            self.lstm_layers = lstm_layers
            self.num_directions = num_directions
            self.lstm_units = lstm_units
    
        def forward(self, text, text_lengths):
            batch_size = text.shape[0]
            h_0, c_0 = (Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)),
                        Variable(torch.zeros(self.lstm_layers * self.num_directions, batch_size, self.lstm_units)))
    
            embedded = self.embedding(text)
            packed_embedded = pack_padded_sequence(embedded, text_lengths.to("cpu"), batch_first=True)
            output, (h_n, c_n) = self.lstm(packed_embedded, (h_0, c_0))
            output_unpacked, output_lengths = pad_packed_sequence(output, batch_first=True, enforce_sorted=False)
            # out = output_unpacked[:, -1, :]
            #return self.linear(ht[-1])
            out = output_unpacked
    
            out1 = self.m1(out)
            out2 = self.m2(out)
            out = torch.cat((out1, out2), 1)
   
            out = F.relu(self.fc1(out))
            preds = F.softmax(self.fc2(out))
    
            return preds

我的训练功能如下:

    import time
    
    def train(dataloader):
        model.train()
        total_acc, total_count = 0, 0
        log_interval = 500
        text_lengths = np.dtype('int64').type(200)
        start_time = time.time()
    
        for idx, (label, text) in enumerate(dataloader):
            optimizer.zero_grad()
            predited_label = model(text, text_lengths = torch.tensor([text_lengths]))
            loss = criterion(predited_label, label)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
            optimizer.step()
            total_acc += (predited_label.argmax(1) == label).sum().item()
            total_count += label.size(0)
            if idx % log_interval == 0 and idx > 0:
                elapsed = time.time() - start_time
                print('| epoch {:3d} | {:5d}/{:5d} batches '
                      '| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
                                                  total_acc/total_count))
                total_acc, total_count = 0, 0
                start_time = time.time()
    
    def evaluate(dataloader):
        model.eval()
        total_acc, total_count = 0, 0
    
        with torch.no_grad():
            for idx, (label, text) in enumerate(dataloader):
                predited_label = model(text)
                loss = criterion(predited_label, label)
                total_acc += (predited_label.argmax(1) == label).sum().item()
                total_count += label.size(0)
        return total_acc/total_count

我尝试像这样运行代码:

    from torch.utils.data import DataLoader
   
    # Hyperparameters
    EPOCHS = 1 # epoch
    LR =1  # learning rate
    BATCH_SIZE = 1 # batch size for training
    
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=LR)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    total_accu = None
  
    
    train_dataloader = DataLoader(train_data, batch_size=BATCH_SIZE,
                                  shuffle=True, collate_fn=collate_batch)
    test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE,
                                 shuffle=True, collate_fn=collate_batch)
    
    for epoch in range(1, EPOCHS + 1):
        epoch_start_time = time.time()
        train(train_dataloader)
        accu_val = evaluate(valid_dataloader)
        if total_accu is not None and total_accu > accu_val:
          scheduler.step()
        else:
           total_accu = accu_val
        print('-' * 59)
        print('| end of epoch {:3d} | time: {:5.2f}s | '
              'valid accuracy {:8.3f} '.format(epoch,
                                               time.time() - epoch_start_time,
                                               accu_val))
        print('-' * 59)

但是,我收到以下错误。我不确定这里的输入大小指的是什么,而且我在任何地方都找不到其他有相同错误的人。谁能给我建议?

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-111-14ba1dd26348> in <module>()
     27 for epoch in range(1, EPOCHS + 1):
     28     epoch_start_time = time.time()
---> 29     train(train_dataloader)
     30     accu_val = evaluate(valid_dataloader)
     31     if total_accu is not None and total_accu > accu_val:

6 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
    201             raise RuntimeError(
    202                 'input must have {} dimensions, got {}'.format(
--> 203                     expected_input_dim, input.dim()))
    204         if self.input_size != input.size(-1):
    205             raise RuntimeError(

RuntimeError: input must have 2 dimensions, got 1
4

0 回答 0