0

我正在编写代码来训练 sbert 进行分类。当我尝试使用 lossname == 'cosinus' 运行代码时,我收到了这个 Dtype 错误。当我使用 lossname == 'rank' 运行代码时,它运行良好。我不知道为什么我会收到这个错误。任何帮助都将是可观的。

train_dataloader = datasets.NoDuplicatesDataLoader(data_samples, batch_size=16)

def sentrans_train(modelname_or_path="",
             taskname="classifier", 
             lossname="cosinus",
             train_path="train/*.csv",
             val_path="val/*.csv",
             metricname='cosinus',
             dirout ="mymodel_save/",
             cc:dict= None):

model = SentenceTransformer('distilbert-base-nli-mean-tokens')
logging.info("Read STSbenchmark dev dataset")
dev_samples = []
with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
    reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
    for row in reader:
        if row['split'] == 'dev':
            score = float(row['score']) / 5.0 #Normalize score to range 0 ... 1
            dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=score))

dev_evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, batch_size=16, name='sts-dev')


if lossname == 'cosinus':
   train_loss = losses.CosineSimilarityLoss(model)

elif lossname == rank:
   # Our training loss
   train_loss = losses.MultipleNegativesRankingLoss(model)

if taskname == 'classifier':

# compile model
# model.compile(optimizer=opt, loss=losses, metrics=metrics)

# Configure the training
warmup_steps = math.ceil(len(train_dataloader) * cc.epoch * 0.1) #10% of train data for warm-up.  
logging.info("Warmup-steps: {}".format(warmup_steps))


print(len(train_dataloader))
    
model_save_path = './cosinus/results'
model.fit(train_objectives=[(train_dataloader, train_loss)],
      evaluator=dev_evaluator,
      epochs=cc.epoch,
      evaluation_steps=int(len(train_dataloader)*0.1),
      warmup_steps=cc.warmup,
      output_path=model_save_path,
      use_amp=False          
      )
test()

'''

**运行时错误**

在此处输入图像描述

4

0 回答 0