当我运行 demo.py
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased")
model = AutoModel.from_pretrained("distilbert-base-multilingual-cased", return_dict=True)
# print(model)
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(count_parameters(model))
inputs = tokenizer("史密斯先生不在,他去看电影了。Mr Smith is not in. He ________ ________to the cinema", return_tensors="pt")
print(inputs)
outputs = model(**inputs)
print(outputs)
代码显示
{'input_ids': tensor([[ 101, 2759, 3417, 4332, 2431, 5600, 2080, 3031, 10064, 2196,
2724, 5765, 5614, 3756, 2146, 1882, 12916, 11673, 10124, 10472,
10106, 119, 10357, 168, 168, 168, 168, 168, 168, 168,
168, 168, 168, 168, 168, 168, 168, 168, 168, 10114,
10105, 18458, 119, 102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
使用 bos_token,但尚未设置。使用 eos_token,但尚未设置。 为什么要打印 bos_token?