bert-language-model - RuntimeError：输入必须有 3 个维度，得到 4

Question

任何帮助将不胜感激，下面是我的代码，我收到了上述错误。我遵循我在网上找到的一些代码片段。不太清楚如何处理错误。我正在尝试创建一个使用 BERT 作为上游的模型，然后在被 softmax 分类之前注意输入 BiLSTM。

%%time
import torch
import torch.nn as nn
from transformers import BertModel
from torch.nn import Parameter

def new_parameter(*size):
    out = Parameter(torch.FloatTensor(*size))
    torch.nn.init.xavier_normal(out)
    return out

class Attention(nn.Module):
    def __init__(self, attention_size):
        super(Attention, self).__init__()
        self.attention = new_parameter(attention_size, 1)
   def forward(self, x_in):
       # after this, we have (batch, dim1) with a diff weight per each cell
       attention_score = torch.matmul(x_in, self.attention).squeeze()
       attention_score = F.softmax(attention_score).view(x_in.size(0), x_in.size(1), 1)
       scored_x = x_in * attention_score
       # now, sum across dim 1 to get the expected feature vector
       condensed_x = torch.sum(scored_x, dim=1)
       return condensed_x

class Net(nn.Module):
    def __init__(self, freeze_bert=False):
        super(Net, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.lstm=nn.LSTM(input_size=768, hidden_size=384, num_layers=2, dropout=.5, 
                          bidirectional=True, batch_first=True)
        self.attention=Attention(768)
        self.classifier=nn.Linear(768,2)
        #self.softmax = nn.Softmax()
        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
            
   def forward(self, input_ids, attention_mask):
       sequence_output, pooled_output=self.bert(input_ids=input_ids, attention_mask=attention_mask)
       x,(h,c)=self.lstm(sequence_output.unsqueeze(0))
       x=self.attention(x.view(x.shape[1],1,768))
       logits=self.classifier(x)
       #x = self.softmax(x)
       return logits

这是完整错误的图像：

bert-language-model - RuntimeError：输入必须有 3 个维度，得到 4

0 回答 0

Related

Reference