任何帮助将不胜感激,下面是我的代码,我收到了上述错误。我遵循我在网上找到的一些代码片段。不太清楚如何处理错误。我正在尝试创建一个使用 BERT 作为上游的模型,然后在被 softmax 分类之前注意输入 BiLSTM。
%%time
import torch
import torch.nn as nn
from transformers import BertModel
from torch.nn import Parameter
def new_parameter(*size):
out = Parameter(torch.FloatTensor(*size))
torch.nn.init.xavier_normal(out)
return out
class Attention(nn.Module):
def __init__(self, attention_size):
super(Attention, self).__init__()
self.attention = new_parameter(attention_size, 1)
def forward(self, x_in):
# after this, we have (batch, dim1) with a diff weight per each cell
attention_score = torch.matmul(x_in, self.attention).squeeze()
attention_score = F.softmax(attention_score).view(x_in.size(0), x_in.size(1), 1)
scored_x = x_in * attention_score
# now, sum across dim 1 to get the expected feature vector
condensed_x = torch.sum(scored_x, dim=1)
return condensed_x
class Net(nn.Module):
def __init__(self, freeze_bert=False):
super(Net, self).__init__()
self.bert = BertModel.from_pretrained('bert-base-uncased')
self.lstm=nn.LSTM(input_size=768, hidden_size=384, num_layers=2, dropout=.5,
bidirectional=True, batch_first=True)
self.attention=Attention(768)
self.classifier=nn.Linear(768,2)
#self.softmax = nn.Softmax()
# Freeze the BERT model
if freeze_bert:
for param in self.bert.parameters():
param.requires_grad = False
def forward(self, input_ids, attention_mask):
sequence_output, pooled_output=self.bert(input_ids=input_ids, attention_mask=attention_mask)
x,(h,c)=self.lstm(sequence_output.unsqueeze(0))
x=self.attention(x.view(x.shape[1],1,768))
logits=self.classifier(x)
#x = self.softmax(x)
return logits