遗憾的是,当前版本的 lxml 不再包含此功能。但是,我使用简单的自定义加载程序开发了一种解决方法。这是一个测试脚本,它演示了上述方法中的错误以及解决方法。请注意,此方法仅更新xml:base
程序的输出(使用 Python 3.9.1,lxml 4.6.3):
Included file was source.xml; xinclude reports it as document.xml
Included file was source.xml; workaround reports it as source.xml
# Includes
# ========
from pathlib import Path
from textwrap import dedent
from lxml import etree as ElementTree
from lxml import ElementInclude
# Setup
# =====
# Create a sample document, taken from the `Python stdlib
# <https://docs.python.org/3/library/xml.etree.elementtree.html#id3>`_...
<?xml version="1.0"?>
<document xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="source.xml" parse="xml" />
# ...and the associated include file.
Path("source.xml").write_text("<para>This is a paragraph.</para>")
# Failing xinclude case
# =====================
# Load and xinclude this.
tree = ElementTree.parse("document.xml")
# Show that the ``base`` attribute refers to the top-level
# ``document.xml``, instead of the xincluded ``source.xml``.
root = tree.getroot()
print(f"Included file was source.xml; xinclude reports it as {root[0].base}")
# Workaround
# ==========
# As a workaround, define a loader which sets the ``xml:base`` of an
# xincluded element. While lxml evidently used to do this, a change
# eliminated this ability per some `discussion
# <https://mail.gnome.org/archives/xml/2014-April/msg00015.html>`_,
# which included a rejected patch fixing this problem. `Current source
# <https://github.com/GNOME/libxml2/blob/master/xinclude.c#L1689>`_
# lacks this patch.
def my_loader(href, parse, encoding=None, parser=None):
ret = ElementInclude._lxml_default_loader(href, parse, encoding, parser)
ret.attrib["{http://www.w3.org/XML/1998/namespace}base"] = href
return ret
new_tree = ElementTree.parse("document.xml")
ElementInclude.include(new_tree, loader=my_loader)
new_root = new_tree.getroot()
print(f"Included file was source.xml; workaround reports it as {new_root[0].base}")