我正在尝试微调 CamemBERT(罗伯塔的法语版本)以进行问答。
首先,我使用CamemBERT模型来生成问题和文本的输入嵌入,并使用输出线性层来输出与答案的开始和结束相对应的开始和结束 logits。
在论文的官方结果中,问答的表现是(88 %, 77%) of (F1 score, EM)但我得到的结果是(71%, 46%)。
我的问题是为什么结果不够接近?
这是我用来在 FQuAD 数据集上训练和评估模型的脚本的一部分,其超参数与官方模型相同:
MAX_SEQ_LENGTH = 384
TRAIN_BATCH_SIZE = 12
n_epochs = 3
learning_rate = 3e-5
EVAL_BATCH_SIZE = 12
dropout = 0
BERT_TYPE = "fmikaelian/camembert-base-fquad"
class CamemBERTQA(nn.Module):
def __init__(self,bert_type, hidden_size, num_labels):
super(CamemBERTQA, self).__init__()
self.bert_type = bert_type
self.hidden_size = hidden_size
self.num_labels = num_labels
self.camembert = AutoModel.from_pretrained(self.bert_type)
self.qa_outputs = nn.Linear(self.hidden_size, self.num_labels)
def forward(self, input_ids):
output = self.camembert(input_ids = input_ids)[0]
logits = self.qa_outputs(output)
start_logits, end_logits = logits.split(1, dim=-1)
start_logits = start_logits.squeeze(-1)
end_logits = end_logits.squeeze(-1)
outputs = (start_logits, end_logits,)
return outputs
def train_eval_model(model, n_epochs, scheduler=None):
train_lossess = []
valid_lossess = []
avg_train_losses = []
avg_valid_losses = []
res = []
for epoch in trange(n_epochs):
#######################################################################################
################################### train the model ###################################
#######################################################################################
model.train()
for batch, d in enumerate(tqdm_notebook(train_dataloader, desc="Iteration")):
ids = d['ids']
start_pos = d['start_pos']
end_pos = d['end_pos']
ids = ids.to(device, dtype = torch.long)
start_pos = start_pos.to(device, dtype = torch.long)
end_pos = end_pos.to(device, dtype = torch.long)
optimizer.zero_grad()
start_and_end_scores = model(ids) # Forward pass return start and end positions
loss = loss_func(start_and_end_scores, start_pos, end_pos)
loss.backward()
optimizer.step()
if scheduler is not None:
scheduler.step()
train_lossess.append(loss.item())
##########################################################################################
################################### validate the model ###################################
##########################################################################################
model.eval()
pred_s = None
pred_e = None
# eval_loss = 0.0
# eval_steps = 0
for batch, d in enumerate(eval_dataloader):
ids = d['ids']
start_pos = d['start_pos']
end_pos = d['end_pos']
ids = ids.to(device, dtype = torch.long)
start_pos = start_pos.to(device, dtype = torch.long)
end_pos = end_pos.to(device, dtype = torch.long)
with torch.no_grad():
start_and_end_scores = model(ids)
loss = loss_func(start_and_end_scores, start_pos, end_pos)
valid_lossess.append(loss.item())
# eval_steps += 1
if pred_s is None:
pred_s = start_and_end_scores[0].detach().cpu().numpy()
pred_e = start_and_end_scores[1].detach().cpu().numpy()
else:
pred_s = np.append(pred_s, start_and_end_scores[0].detach().cpu().numpy(), axis=0)
pred_e = np.append(pred_e, start_and_end_scores[1].detach().cpu().numpy(), axis=0)
pred_start = np.argmax(pred_s, axis=1)
pred_end = np.argmax(pred_e, axis=1)
res.append([pred_start,pred_end])
train_loss = np.average(train_lossess)
valid_loss = np.average(valid_lossess)
avg_train_losses.append(train_loss)
avg_valid_losses.append(valid_loss)
epoch_len = len(str(n_epochs))
print_msg = (f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
f'train_loss: {train_loss:.5f} ' +
f'valid_loss: {valid_loss:.5f}')
print(print_msg)
train_lossess = []
valid_lossess = []
return model, avg_train_losses, avg_valid_losses, res