这是一个快速的片段 - 关键是 sax 解析器会在它们通过时生成事件,因此它们允许您处理内容直到它中断的点。
#!/usr/bin/env python
import sys
from xml.sax import handler, make_parser
class TagHandler(handler.ContentHandler):
def __init__(self):
handler.ContentHandler.__init__(self)
self.stack = []
def startElement(self, name, attrs):
self.stack.append(name)
def endElement(self, name):
# TODO: might want to just confirm that the element matches the top of the stack here
self.stack.pop()
def finish_document(self):
return "\n".join(["</%s>" % tag for tag in reversed(self.stack)])
parser = make_parser()
handler = TagHandler()
parser.setContentHandler(handler)
try:
parser.parse(sys.argv[1])
except:
# TODO: something more intelligent than just printing out the
# constructed end of the document. Like appending it to the source
# and repeating whatever you did to make this processing necessary.
print handler.finish_document()