在我的Python Utilities Github 存储库中,我有一个函数可以从字符串、映射和序列中删除非打印字符和无效的 Unicode 字节:
def filterCharacters(s):
"""
Strip non printable characters
@type s dict|list|tuple|bytes|string
@param s Object to remove non-printable characters from
@rtype dict|list|tuple|bytes|string
@return An object that corresponds with the original object, nonprintable characters removed.
"""
validCategories = (
'Lu', 'Ll', 'Lt', 'LC', 'Lm', 'Lo', 'L', 'Mn', 'Mc', 'Me', 'M', 'Nd', 'Nl', 'No', 'N', 'Pc',
'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'P', 'Sm', 'Sc', 'Sk', 'So', 'S', 'Zs', 'Zl', 'Zp', 'Z'
)
convertToBytes = False
if isinstance(s, dict):
new = {}
for k,v in s.items(): # This is the offending line
new[k] = filterCharacters(v)
return new
if isinstance(s, list):
new = []
for item in s:
new.append(filterCharacters(item))
return new
if isinstance(s, tuple):
new = []
for item in s:
new.append(filterCharacters(item))
return tuple(new)
if isinstance(s, bytes):
s = s.decode('utf-8')
convertToBytes = True
if isinstance(s, str):
s = ''.join(c for c in s if unicodedata.category(c) in validCategories)
if convertToBytes:
s = s.encode('utf-8')
return s
else:
return None
有时这个函数会抛出异常:
Traceback (most recent call last):
File "./util.py", line 56, in filterCharacters
for k,v in s.items():
RuntimeError: dictionary changed size during iteration
我没有看到我在哪里更改作为参数发送的字典。那为什么会抛出这个异常呢?
谢谢!