@mattbasta 在这里有正确的想法。但是,我想提出一些不同的建议:to_string
使用cElementTree.TreeBuilder
. 我不知道 ElementTree 的超快速序列化是否会胜过创建 ElementTree 的开销。
这是一个不稳定的TAG
类,它的to_string_b()
方法利用了一些微优化并使用 TreeBuilder 来构建树。(您的和 TreeBuilder 之间可能的重要区别to_string()
是 TreeBuilder 将始终转义 XML 的输出,而您的则不会。)
import xml.etree.cElementTree as ET
class TAG(object):
def __init__(self, tag="TAG", contents=None, **attributes):
self.tag = tag
# this is to insure that `contents` always has a uniform
# type.
if contents is None:
self.contents = []
else:
if isinstance(contents, basestring):
# I suspect the calling code passes in a string as contents
# in the common case, so this means that each character of
# the string will be yielded one-by-one. let's avoid that by
# wrapping in a list.
self.contents = [contents]
else:
self.contents = contents
self.attributes = attributes
def to_string(self):
yield '<{}'.format(self.tag)
for (a, v) in self.attributes.items():
yield ' {}="{}"'.format(a, v)
if self.contents is None:
yield '/>'
else :
yield '>'
for c in self.contents:
if isinstance(c, TAG):
for i in c.to_string():
yield i
else:
yield c
yield '</{}>'.format(self.tag)
def to_string_b(self, builder=None):
global isinstance, basestring
def isbasestring(c, isinstance=isinstance, basestring=basestring):
# some inlining
return isinstance(c, basestring)
if builder is None:
iamroot = True
builder = ET.TreeBuilder()
else:
iamroot = False #don't close+flush the builder
builder.start(self.tag, self.attributes)
if self.contents is not None:
for c in self.contents:
if (isbasestring(c)):
builder.data(c)
else:
for _ in c.to_string_b(builder):
pass
builder.end(self.tag)
# this is a yield *ONLY* to preserve the interface
# of to_string()! if you can change the calling
# code easily, use return instead!
if iamroot:
yield ET.tostring(builder.close())
class H1(TAG):
def __init__(self, contents=None, **attributes):
TAG.__init__(self, 'H1', contents, **attributes)
class H2(TAG):
def __init__(self, contents=None, **attributes):
TAG.__init__(self, 'H2', contents, **attributes)
tree = H1(["This is some ", H2("test input", id="abcd", cls="efgh"), " and trailing text"])
print ''.join(tree.to_string())
print ''.join(tree.to_string_b())