我正在尝试为机器学习模型获取我的 GloVe 向量,而不必每次都加载模型。因此,我想将 glove_model 字典保存到 json 文件中,以便在初始构建后可以将其加载到其他地方。
但是我不断收到此错误:“TypeError:ndarray 类型的对象不是 JSON 可序列化的”-下面的完整错误。谢谢!
import numpy as np
import json
def make_glove_model():
'''
Load gloVe pre-trained vectors.
Dict keys = tokens (strings); values = word vectors (np arrays of length 50).
'''
filename = 'Data/glove_twitter_50d.txt'
print("gloVe vectors loading . . .")
with open(filename,'r', encoding='utf8') as foo:
gloveModel = {}
for line in foo:
splitLines = line.split()
word = splitLines[0]
wordEmbedding = np.array([float(value) for value in splitLines[1:]])
gloveModel[word] = wordEmbedding
# Get average of word vectors to be used for unseen words, per GloVe author
with open(filename, 'r', encoding='utf8') as foo:
for i, line in enumerate(foo):
pass
n_vec = i + 1
hidden_dim = len(line.split(' ')) - 1
vecs = np.zeros((n_vec, hidden_dim), dtype=np.float32)
with open(filename, 'r', encoding='utf8') as foo:
for i, line in enumerate(foo):
vecs[i] = np.array([float(n) for n in line.split(' ')[1:]], dtype=np.float32)
avg_vec = np.mean(vecs, axis=0)
print(len(gloveModel),"gloVe vectors loaded.")
return gloveModel, avg_vec
glove_model, avg_vec = make_glove_model()
with open('glove_model.json', 'w') as f:
json.dump(glove_model, f)
这会引发错误:
Traceback (most recent call last):
File "D:\Documents on D\Machine_Learning\Sentiment_Analysis\import_glove.py", line 43, in <module>
json.dump(glove_model, f)
File "D:\Anaconda\envs\tfgpu\lib\json\__init__.py", line 179, in dump
for chunk in iterable:
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 405, in _iterencode_dict
yield from chunks
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 438, in _iterencode
o = _default(o)
File "D:\Anaconda\envs\tfgpu\lib\json\encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type ndarray is not JSON serializable
然而,当我检查数据类型时,
type(glove_model)
Out[17]: dict
type(avg_vec)
Out[18]: numpy.ndarray