XPath 非常适合这种东西。//TYPE[NUMBER='7721' and DATA]
将找到所有 TYPE 节点,其中至少有一个带有文本“7721”的 NUMBER 子节点和至少一个 DATA 子节点:
from lxml import etree
xmlstr = """<html>
<A>
<B>
<C>
<D>
<TYPE>
<NUMBER>7297</NUMBER>
<DATA />
</TYPE>
<TYPE>
<NUMBER>7721</NUMBER>
<DATA>A=1,B=2,C=3,</DATA>
</TYPE>
</D>
</C>
</B>
</A>
</html>"""
html_element = etree.fromstring(xmlstr)
# find all the TYPE nodes that have NUMBER=7721 and DATA nodes
type_nodes = html_element.xpath("//TYPE[NUMBER='7721' and DATA]")
# the for loop is probably superfluous, but who knows, there might be more than one!
for t in type_nodes:
d = t.find('DATA')
# example: append spamandeggs to the end of the data text
if d.text is None:
d.text = 'spamandeggs'
else:
d.text += 'spamandeggs'
print etree.tostring(html_element)
输出:
<html>
<A>
<B>
<C>
<D>
<TYPE>
<NUMBER>7297</NUMBER>
<DATA/>
</TYPE>
<TYPE>
<NUMBER>7721</NUMBER>
<DATA>A=1,B=2,C=3,spamandeggs</DATA>
</TYPE>
</D>
</C>
</B>
</A>
</html>