为什么此脚本出现“无效语法”错误?我正在使用 Python 3.3.2 Shell,当我尝试运行该模块时,只会弹出一个“无效语法”。
import os, re, csv
import email, email.message, email.header
MAILDIR = 'mails'
FEATURES = ['from', 'domain', 'ip', 'country', 'content-type', 'charset', 'reply', 'recipients', 'images', 'urls']
try:
geoip = False
import pygeoip
gi = pygeoip.GeoIP('GeoIP.dat', pygeoip.MEMORY_CACHE)
geoip = True
except ImportError:
pass
except IOError:
print 'GeoIP.dat not found'
class Mail(email.message.Message):
def count_html_tag(self, tag):
tags = 0
for part in email.iterators.typed_subpart_iterator(self, 'text', 'html'):
html = part.get_payload(decode=True)
tags += html.lower().count('<' + tag)
return tags
def feature(self, feature):
# content-type: plain, html, multipart
if feature == 'content-type':
if self.is_multipart():
return 'multipart'
else:
return self.get_content_subtype()
# charset: list of charsets (if multipart)
elif feature == 'charset':
return ','.join(sorted(set(self.get_charsets()) - {None}))
# from: claimed email address of sender
elif feature == 'from':
addr = self.get('from', self.get('sender', ''))
return email.utils.parseaddr(addr)[1]
# domain: domain name of sender
elif feature == 'domain':
return self.feature('from').split('@', 1)[1]
# ip: possible ip address of sender
elif feature == 'ip':
if 'x-original-ip' in self:
return self['x-original-ip']
else:
for received in reversed(self.get_all('received')):
m = re.search('\[([0-9.]*)\]', received)
if m and m.group(1)[:3] not in ('127', '192', '172', '10.'):
return m.group(1)
else:
return ''
# country: country of sender, based on ip
elif feature == 'country':
if geoip:
ip = self.feature('ip')
return gi.country_code_by_addr(ip) if ip else ''
else:
return ''
# recipient: number of known recipients
elif feature == 'recipients':
fields = ('to', 'cc', 'resent-to', 'resent-cc')
return sum(len(email.utils.getaddresses(self.get_all(field, []))) for field in fields)
# reply: re, fw
elif feature == 'reply':
subject = email.header.decode_header(self.get('Subject', ''))[0][0]
m = re.match('(re|fw)d?:', subject.lower())
return m.group(1) if m else ''
# images: number of images in html
elif feature == 'images':
return self.count_html_tag('img')
# images: number of urls in html
elif feature == 'urls':
return self.count_html_tag('a')
else:
raise KeyError()
def features(self, lst=FEATURES):
return {feature: self.feature(feature) for feature in lst}
def main():
with open('features.csv', 'wb') as csvfile:
writer = csv.DictWriter(csvfile, FEATURES)
writer.writeheader()
for mpath in os.listdir(MAILDIR):
with open(os.path.join(MAILDIR, mpath)) as mfile:
mail = email.message_from_file(mfile, Mail)
writer.writerow(mail.features(FEATURES))
if __name__ == '__main__':
main()
提前致谢!我对 Python 有点陌生