我正在使用此代码通过拥抱人脸转换器库生成句子嵌入,但出现此错误。我似乎无法解决这个问题。任何指针都会有所帮助。谢谢。
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output[0] #First element of model_output contains all token embeddings
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
# Sentences we want sentence embeddings for
sentences = ['This is an example sentence', 'Each sentence is converted']
# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-roberta-large-v1')
model = AutoModel.from_pretrained('sentence-transformers/all-roberta-large-v1')
# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
# Compute token embeddings
with torch.no_grad():
model_output = model(**encoded_input)
# Perform pooling
sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
# Normalize embeddings
sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
print("Sentence embeddings:")
print(sentence_embeddings)
print(torch.__version__)
>> 1.4.0
我收到以下错误。
RuntimeError Traceback(最近一次调用)~\anaconda3\envs\tf_env\lib\site-packages\transformers\modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 1434 try: -> 1435 state_dict = torch .load(resolved_archive_file, map_location="cpu") 1436
例外为 e:~\anaconda3\envs\tf_env\lib\site-packages\torch\serialization.py in load(f, map_location, pickle_module, **pickle_load_args) 526 if _is_zipfile(opened_file): --> 527 with _open_zipfile_reader(f) as opens_zipfile : 528 返回 _load(opened_zipfile, map_location, pickle_module, **pickle_load_args)
~\anaconda3\envs\tf_env\lib\site-packages\torch\serialization.py in init (self, name_or_buffer) 223 def init (self, name_or_buffer): --> 224 super(_open_zipfile_reader, self)。初始化(火炬._C.PyTorchFileReader(name_or_buffer))225
RuntimeError: version_ <= kMaxSupportedFileFormatVersion INTERNAL ASSERT FAILED at ..\caffe2\serialize\inline_container.cc:132,请向 PyTorch 报告错误。尝试读取版本 3 的 PyTorch 文件,但支持读取的最大版本为 2。您的 PyTorch 安装可能太旧。(在 ..\caffe2\serialize\inline_container.cc:132 初始化)(没有可用的回溯)
在处理上述异常的过程中,又出现了一个异常:
MemoryError Traceback(最近一次调用最后一次)~\AppData\Local\Temp/ipykernel_31400/3667175826.py in ----> 1 model = AutoModel.from_pretrained('sentence-transformers/all-roberta-large-v1') 2 3 # 标记句子 4 encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt') 5
~\anaconda3\envs\tf_env\lib\site-packages\transformers\models\auto\auto_factory.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 445 elif type(config) in cls._model_mapping.keys( ): 446 model_class = _get_model_class(config, cls._model_mapping) --> 447 return model_class.from_pretrained(pretrained_model_name_or_path, *model_args, config=config, **kwargs) 448 raise ValueError(449 f"Unrecognized configuration class { config.class }对于这种 AutoModel:{cls.name }。\n"
~\anaconda3\envs\tf_env\lib\site-packages\transformers\modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) 1437 try: 1438 with open(resolved_archive_file) as f: -> 1439 if f .read().startswith("version"): 1440 raise OSError( 1441
"你似乎克隆了一个没有安装 git-lfs 的存储库。请安装"~\anaconda3\envs\tf_env\lib\encodings\cp1252.py in decode(self, input, final) 21 class IncrementalDecoder(codecs.IncrementalDecoder): 22 def decode(self, input, final=False): ---> 23 return codecs.charmap_decode(input,self.errors,decoding_table)[0] 24 25 class StreamWriter(Codec,codecs.StreamWriter):
内存错误: