Python 的电子邮件包可让您获取电子邮件消息的全文并将其解析为多个部分并遍历各个部分。BODYSTRUCTURE
但是,是否有一个库可以将 IMAP 协议返回的响应解析为多个email.message.Message
部分?
编辑:
PHP 中的等效方法是imap_fetchbody()
,它自动处理结构的解析。
编辑2:
这个问题被错误地关闭为重复。它是将BODYSTRUCTURE
嵌套表达式解析为有用的格式(例如字典),而不是将原始字符串解析为 python 类型。无论如何,我最终推出了自己的解决方案,这里是将来遇到类似问题的任何人的代码。imapclient
它旨在与库一起使用
# ----- Parsing BODYSTRUCTURE into parts dictionaries ----- #
def tuple2dict(pairs):
"""get dict from (key, value, key, value, ...) tuple"""
if not pairs:
return None
return dict([(k, tuple2dict(v) if isinstance(v, tuple) else v)
for k, v in zip(pairs[::2], pairs[1::2])])
def parse_singlepart(var, part_no):
"""convert non-multipart into dic"""
# Basic fields for non-multipart (Required)
part = dict(zip(['maintype', 'subtype', 'params', 'id', 'description', 'encoding', 'size'], var[:7]), part_no=part_no)
part['params'] = tuple2dict(part['params'])
# Type specific fields (Required for 'message' or 'text' type)
index = 7
if part['maintype'].lower() == 'message' and part['subtype'].lower() == 'rfc822':
part.update(zip(['envelope', 'bodystructure', 'lines'], var[7:10]))
index = 10
elif part['maintype'].lower() == 'text':
part['lines'] = var[7]
index = 8
# Extension fields for non-multipart (Optional)
part.update(zip(['md5', 'disposition', 'language', 'location'], var[index:]))
part['disposition'] = tuple2dict(part['disposition'])
return part
def parse_multipart(var, part_no):
"""convert the multipart into dict"""
part = { 'child_parts': [], 'part_no': part_no }
# First parse the child parts
index = 0
if isinstance(var[0], list):
part['child_parts'] = [parse_part(v, ('%s.%d' % (part_no, i+1)).replace('TEXT.', '')) for i, v in enumerate(var[0])]
index = 1
elif isinstance(var[0], tuple):
while isinstance(var[index], tuple):
part['child_parts'].append(parse_part(var[index], ('%s.%d' % (part_no, index+1)).replace('TEXT.', '')))
index += 1
# Then parse the required field subtype and optional extension fields
part.update(zip(['subtype', 'params', 'disposition', 'language', 'location'], var[index:]))
part['params'] = tuple2dict(part['params'])
part['disposition'] = tuple2dict(part['disposition'])
return part
def parse_part(var, part_no=None):
"""Parse IMAP email BODYSTRUCTURE into nested dictionary
See http://tools.ietf.org/html/rfc3501#section-6.4.5 for structure of email messages
See http://tools.ietf.org/html/rfc3501#section-7.4.2 for specification of BODYSTRUCTURE
"""
if isinstance(var[0], (tuple, list)):
return parse_multipart(var, part_no or 'TEXT')
else:
return parse_singlepart(var, part_no or '1')