0

我在强盗中遇到了错误。众所周知,使用 lxml.etree.parse 解析不受信任的 XML 数据容易受到 XML 攻击。将 lxml.etree.parse 替换为其 defusedxml 等效函数。

我希望下面的代码与 defusedxml 等效。

from lxml import etree, objectify
def fn_read_xml_root(xml_file):
    """
    function open xml and remove annotation and return the root node
    xml_file : xml file to be parsed
    """
    with open(xml_file, "r", encoding="utf-8") as x_file:
        xml_data = x_file.read()

    parser = etree.XMLParser(remove_blank_text=True)
    xtree = etree.parse(xml_file, parser)
    xroot = xtree.getroot()
    for elem in xroot.getiterator():
        if not hasattr(elem.tag, "find"):
            continue  # (1)
        idx = elem.tag.find("}")
        if idx >= 0:
            elem.tag = elem.tag[idx + 1:]
    objectify.deannotate(xroot, cleanup_namespaces=True)
    # return xml data and root node of the file
    return xml_data, xroot
4

1 回答 1

0
def remove_namespace(elem):
    """
    function to remove namespace from  doc element
    node_key : xml doc element
    """
    elem = elem[elem.find("}") + 1 :] if elem.startswith("{") else elem
    return elem


def remove_all_namespaces(doc):
    """
    function to remove namespaces from xml
    doc : xml doc element
    """
    for elem in doc.iter():
        elem.tag = remove_namespace(elem.tag)
        elem.attrib = {remove_namespace(key): value for key, value in elem.attrib.items()}
    return doc


def fn_read_xml_root(xml_file):
    """
    function open xml and remove annotation and return the root node
    xml_file : xml file to be parsed
    """
    with open(xml_file, "r", encoding="utf-8") as x_file:
        xml_data = x_file.read()
    xroot = ET.parse(xml_file).getroot()
    try:
        xroot = remove_all_namespaces(xroot)
    except Exception as exp:
        logging.info(f"XML namespace remove error {str(exp)}")
    # return xml data and root node of the file
    return xml_data, xroot
于 2021-09-20T05:38:27.383 回答