这就是我为寻找类似解决方案的任何人所做的。欢迎评论,其他选项:
import os import re import urllib import mechanize import xml.sax.saxutils as saxutils from xml.sax.saxutils import unescape
try:
issueRoot = os.environ['newslettersroot'] + os.environ['currYear'] + '/' + os.environ['issueRoot'] + '/'
except KeyError:
print "Please run init.bat"
sys.exit(1)
srcEmailFilename = 'email.html'
dstEmailFilename = 'email_inline_css.html'
# retrieve <body> section only
html = open(issueRoot + srcEmailFilename, 'rb').read()
html = re.findall("(?si)<body.*?</body>", html)[0]
# use mailchimp inlineCss site to inject class rules into html tags
response = mechanize.urlopen("http://beaker.mailchimp.com/inline-css")
# retrieve form
form = mechanize.ParseResponse(response, backwards_compat=False)[0]
form["html"] = html
# form["strip"] = "checked"
# submit form and retrieve result
html = mechanize.urlopen(form.click()).read()
match = re.search('<textarea name="text" cols="100" rows="12">(.*?)</textarea>', html, re.DOTALL | re.IGNORECASE | re.MULTILINE)
if not match:
print html
exit("Expected to find output from mailchimp.")
# clean up output
html = match.group(1)
html = saxutils.unescape(html)
html = urllib.unquote_plus(html)
html = unescape(html, {"'": "'", """: '"'})
html = html.replace('&', '&').replace('%2F', '/').replace('%3A', ':')
# @sed -r 's/ class="[a-zA-Z0-9-]+"//g' %newslettersroot%%currYear%\%issueRoot%\email_inlinedcss.html > %newslettersroot%%currYear%\%issueRoot%\email_removedstyle.html
#replace class tags
html = re.sub(r'(?sim)\s*class="[a-zA-Z0-9-]+"', "", html)
fh = open(issueRoot + dstEmailFilename, 'wb')
fh.write(html)
fh.close()