0

我发现了一个特定的(但正确的)XML 结构可能会影响iterparse功能的案例。

import xml.etree.ElementTree as ET

print('Parse')
tree = ET.parse('file')
pdml = tree.getroot()
for packet in pdml:
    for proto in packet:
        if proto.get('name') == 'ip':
            print (len(proto.getchildren()))

print('Iterparse')
for event, elem in ET.iterparse('file', events=('start','end')):
    if event == 'start' and elem.get('name') == 'ip':
        print(len(elem.getchildren()))

结果是:

Parse
18
18
Iterparse
18
3

iterparse返回与提供的不同数量的子节点看起来很奇怪parse

xml 文件有点长(如果我删除了一些节点,iterparse则返回与 的情况相同的值parse):

<?xml version="1.0"?>
<pdml>
<packet>

  <proto name="frame" showname="Frame 1 (121 bytes on wire, 121 bytes captured)" size="121" pos="0">
    <field name="frame.time" showname="Arrival Time: Aug 19, 2017 19:09:14.445697000" size="0" pos="0" show="Aug 19, 2017 19:09:14.445697000"/>
    <field name="frame.time_delta" showname="Time delta from previous captured frame: 0.000000000 seconds" size="0" pos="0" show="0.000000000"/>
    <field name="frame.time_delta_displayed" showname="Time delta from previous displayed frame: 0.000000000 seconds" size="0" pos="0" show="0.000000000"/>
    <field name="frame.time_relative" showname="Time since reference or first frame: 0.000000000 seconds" size="0" pos="0" show="0.000000000"/>
    <field name="frame.number" showname="Frame Number: 1" size="0" pos="0" show="1"/>
    <field name="frame.len" showname="Frame Length: 121 bytes" size="0" pos="0" show="121"/>
    <field name="frame.cap_len" showname="Capture Length: 121 bytes" size="0" pos="0" show="121"/>
    <field name="frame.marked" showname="Frame is marked: False" size="0" pos="0" show="0"/>
    <field name="frame.protocols" showname="Protocols in frame: eth:ip:udp:snmp" size="0" pos="0" show="eth:ip:udp:snmp"/>
    <field name="frame.coloring_rule.name" showname="Coloring Rule Name: UDP" size="0" pos="0" show="UDP"/>
    <field name="frame.coloring_rule.string" showname="Coloring Rule String: udp" size="0" pos="0" show="udp"/>
  </proto>
  <proto name="eth" showname="Ethernet II, Src: Azurewav_bf:17:43 (00:25:d3:bf:17:43), Dst: 54:67:51:7e:c1:ab (54:67:51:7e:c1:ab)" size="14" pos="0">
    <field name="eth.dst" showname="Destination: 54:67:51:7e:c1:ab (54:67:51:7e:c1:ab)" size="6" pos="0" show="54:67:51:7e:c1:ab" value="5467517ec1ab">
      <field name="eth.addr" showname="Address: 54:67:51:7e:c1:ab (54:67:51:7e:c1:ab)" size="6" pos="0" show="54:67:51:7e:c1:ab" value="5467517ec1ab"/>
      <field name="eth.ig" showname=".... ...0 .... .... .... .... = IG bit: Individual address (unicast)" size="3" pos="0" show="0" value="0" unmaskedvalue="546751"/>
      <field name="eth.lg" showname=".... ..0. .... .... .... .... = LG bit: Globally unique address (factory default)" size="3" pos="0" show="0" value="0" unmaskedvalue="546751"/>
    </field>
    <field name="eth.src" showname="Source: Azurewav_bf:17:43 (00:25:d3:bf:17:43)" size="6" pos="6" show="00:25:d3:bf:17:43" value="0025d3bf1743">
      <field name="eth.addr" showname="Address: Azurewav_bf:17:43 (00:25:d3:bf:17:43)" size="6" pos="6" show="00:25:d3:bf:17:43" value="0025d3bf1743"/>
      <field name="eth.ig" showname=".... ...0 .... .... .... .... = IG bit: Individual address (unicast)" size="3" pos="6" show="0" value="0" unmaskedvalue="0025d3"/>
      <field name="eth.lg" showname=".... ..0. .... .... .... .... = LG bit: Globally unique address (factory default)" size="3" pos="6" show="0" value="0" unmaskedvalue="0025d3"/>
    </field>

  </proto>
  <proto name="ip" showname="Internet Protocol, Src: 192.168.0.52 (192.168.0.52), Dst: 10.0.50.249 (10.0.50.249)" size="20" pos="14">
    <field name="ip.version" showname="Version: 4" size="1" pos="14" show="4" value="45"/>
    <field name="ip.hdr_len" showname="Header length: 20 bytes" size="1" pos="14" show="20" value="45"/>
    <field name="ip.dsfield" showname="Differentiated Services Field: 0x00 (DSCP 0x00: Default; ECN: 0x00)" size="1" pos="15" show="0" value="00">
      <field name="ip.dsfield.dscp" showname="0000 00.. = Differentiated Services Codepoint: Default (0x00)" size="1" pos="15" show="0x00" value="0" unmaskedvalue="00"/>
      <field name="ip.dsfield.ect" showname=".... ..0. = ECN-Capable Transport (ECT): 0" size="1" pos="15" show="0" value="0" unmaskedvalue="00"/>
      <field name="ip.dsfield.ce" showname=".... ...0 = ECN-CE: 0" size="1" pos="15" show="0" value="0" unmaskedvalue="00"/>
    </field>
    <field name="ip.len" showname="Total Length: 107" size="2" pos="16" show="107" value="006b"/>
    <field name="ip.id" showname="Identification: 0x7b6a (31594)" size="2" pos="18" show="0x7b6a" value="7b6a"/>
    <field name="ip.flags" showname="Flags: 0x00" size="1" pos="20" show="0x00" value="00">
      <field name="ip.flags.rb" showname="0.. = Reserved bit: Not Set" size="1" pos="20" show="0" value="0" unmaskedvalue="00"/>
      <field name="ip.flags.df" showname=".0. = Don&apos;t fragment: Not Set" size="1" pos="20" show="0" value="0" unmaskedvalue="00"/>
      <field name="ip.flags.mf" showname="..0 = More fragments: Not Set" size="1" pos="20" show="0" value="0" unmaskedvalue="00"/>
    </field>
    <field name="ip.frag_offset" showname="Fragment offset: 0" size="2" pos="20" show="0" value="0000"/>
    <field name="ip.ttl" showname="Time to live: 128" size="1" pos="22" show="128" value="80"/>
    <field name="ip.proto" showname="Protocol: UDP (0x11)" size="1" pos="23" show="0x11" value="11"/>
    <field name="ip.checksum" showname="Header checksum: 0xc142 [validation disabled]" size="2" pos="24" show="0xc142" value="c142">
      <field name="ip.checksum_good" showname="Good: False" size="2" pos="24" show="0" value="c142"/>
      <field name="ip.checksum_bad" showname="Bad : False" size="2" pos="24" show="0" value="c142"/>
    </field>
    <field name="ip.src" showname="Source: 192.168.0.52 (192.168.0.52)" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.addr" showname="Source or Destination Address: 192.168.0.52 (192.168.0.52)" hide="yes" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.src_host" showname="Source Host: 192.168.0.52" hide="yes" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.host" showname="Source or Destination Host: 192.168.0.52" hide="yes" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.dst" showname="Destination: 10.0.50.249 (10.0.50.249)" size="4" pos="30" show="10.0.50.249" value="0a0032f9"/>
    <field name="ip.addr" showname="Source or Destination Address: 10.0.50.249 (10.0.50.249)" hide="yes" size="4" pos="30" show="10.0.50.249" value="0a0032f9"/>
    <field name="ip.dst_host" showname="Destination Host: 10.0.50.249" hide="yes" size="4" pos="30" show="10.0.50.249" value="0a0032f9"/>
    <field name="ip.host" showname="Source or Destination Host: 10.0.50.249" hide="yes" size="4" pos="30" show="10.0.50.249" value="0a0032f9"/>
  </proto>
  <proto name="udp" showname="User Datagram Protocol, Src Port: 63340 (63340), Dst Port: snmp (161)" size="8" pos="34">
    <field name="udp.srcport" showname="Source port: 63340 (63340)" size="2" pos="34" show="63340" value="f76c"/>
    <field name="udp.dstport" showname="Destination port: snmp (161)" size="2" pos="36" show="161" value="00a1"/>
    <field name="udp.port" showname="Source or Destination Port: 63340" hide="yes" size="2" pos="34" show="63340" value="f76c"/>
    <field name="udp.port" showname="Source or Destination Port: 161" hide="yes" size="2" pos="36" show="161" value="00a1"/>
    <field name="udp.length" showname="Length: 87" size="2" pos="38" show="87" value="0057"/>
    <field name="udp.checksum_coverage" showname="Checksum coverage: 87" hide="yes" size="0" pos="38" show="87"/>
    <field name="udp.checksum" showname="Checksum: 0x2241 [validation disabled]" size="2" pos="40" show="0x2241" value="2241">
      <field name="udp.checksum_good" showname="Good Checksum: False" size="2" pos="40" show="0" value="2241"/>
      <field name="udp.checksum_bad" showname="Bad Checksum: False" size="2" pos="40" show="0" value="2241"/>
    </field>
  </proto>
  <proto name="snmp" showname="Simple Network Management Protocol" size="74" pos="47">
    <field name="snmp.version" showname="version: version-1 (0)" size="1" pos="46" show="0" value="00"/>
    <field name="snmp.community" showname="community: public" size="6" pos="49" show="public" value="7075626c6963"/>
    <field name="snmp.data" showname="data: get-request (0)" size="66" pos="55" show="0" value="a040020300dbbd0201000201003033300f060b2b060102011903020105010500300f060b2b060102011903050101010500300f060b2b060102011903050102010500">
      <field name="snmp.get_request" showname="get-request" size="64" pos="57" show="" value="">
        <field name="snmp.request_id" showname="request-id: 56253" size="3" pos="59" show="56253" value="00dbbd"/>
        <field name="snmp.error_status" showname="error-status: noError (0)" size="1" pos="64" show="0" value="00"/>
        <field name="snmp.error_index" showname="error-index: 0" size="1" pos="67" show="0" value="00"/>
        <field name="snmp.variable_bindings" showname="variable-bindings: 3 items" size="51" pos="70" show="3" value="300f060b2b060102011903020105010500300f060b2b060102011903050101010500300f060b2b060102011903050102010500">
          <field name="" show="1.3.6.1.2.1.25.3.2.1.5.1: Value (Null)" size="17" pos="70" value="300f060b2b060102011903020105010500">
            <field name="snmp.name" showname="Object Name: 1.3.6.1.2.1.25.3.2.1.5.1 (iso.3.6.1.2.1.25.3.2.1.5.1)" size="11" pos="74" show="1.3.6.1.2.1.25.3.2.1.5.1" value="2b06010201190302010501"/>
            <field name="snmp.value.null" showname="Value (Null)" size="0" pos="87" show="" value="">
              <proto name="expert" showname="Expert Info (Note/Undecoded): Unresolved value, Missing MIB" size="0" pos="0">
                <field name="expert.message" showname="Message: Unresolved value, Missing MIB" size="0" pos="0" show="Unresolved value, Missing MIB"/>
                <field name="expert.severity" showname="Severity level: Note" size="0" pos="0" show="Note"/>
                <field name="expert.group" showname="Group: Undecoded" size="0" pos="0" show="Undecoded"/>
              </proto>
            </field>
          </field>
          <field name="" show="1.3.6.1.2.1.25.3.5.1.1.1: Value (Null)" size="17" pos="87" value="300f060b2b060102011903050101010500">
            <field name="snmp.name" showname="Object Name: 1.3.6.1.2.1.25.3.5.1.1.1 (iso.3.6.1.2.1.25.3.5.1.1.1)" size="11" pos="91" show="1.3.6.1.2.1.25.3.5.1.1.1" value="2b06010201190305010101"/>
            <field name="snmp.value.null" showname="Value (Null)" size="0" pos="104" show="" value="">
              <proto name="expert" showname="Expert Info (Note/Undecoded): Unresolved value, Missing MIB" size="0" pos="0">
                <field name="expert.message" showname="Message: Unresolved value, Missing MIB" size="0" pos="0" show="Unresolved value, Missing MIB"/>
                <field name="expert.severity" showname="Severity level: Note" size="0" pos="0" show="Note"/>
                <field name="expert.group" showname="Group: Undecoded" size="0" pos="0" show="Undecoded"/>
              </proto>
            </field>
          </field>
          <field name="" show="1.3.6.1.2.1.25.3.5.1.2.1: Value (Null)" size="17" pos="104" value="300f060b2b060102011903050102010500">
            <field name="snmp.name" showname="Object Name: 1.3.6.1.2.1.25.3.5.1.2.1 (iso.3.6.1.2.1.25.3.5.1.2.1)" size="11" pos="108" show="1.3.6.1.2.1.25.3.5.1.2.1" value="2b06010201190305010201"/>
            <field name="snmp.value.null" showname="Value (Null)" size="0" pos="121" show="" value="">
              <proto name="expert" showname="Expert Info (Note/Undecoded): Unresolved value, Missing MIB" size="0" pos="0">
                <field name="expert.message" showname="Message: Unresolved value, Missing MIB" size="0" pos="0" show="Unresolved value, Missing MIB"/>
                <field name="expert.severity" showname="Severity level: Note" size="0" pos="0" show="Note"/>
                <field name="expert.group" showname="Group: Undecoded" size="0" pos="0" show="Undecoded"/>
              </proto>
            </field>
          </field>
        </field>
      </field>
    </field>
  </proto>
</packet>

<packet>
  <proto name="geninfo" pos="0" showname="General information" size="121">
    <field name="num" pos="0" show="2" showname="Number" value="2" size="121"/>
    <field name="len" pos="0" show="121" showname="Frame Length" value="79" size="121"/>
    <field name="caplen" pos="0" show="121" showname="Captured Length" value="79" size="121"/>
    <field name="timestamp" pos="0" show="Aug 19, 2017 19:09:14.447761000" showname="Captured Time" value="1503162554.447761000" size="121"/>
  </proto>
  <proto name="frame" showname="Frame 2 (121 bytes on wire, 121 bytes captured)" size="121" pos="0">
    <field name="frame.time" showname="Arrival Time: Aug 19, 2017 19:09:14.447761000" size="0" pos="0" show="Aug 19, 2017 19:09:14.447761000"/>
    <field name="frame.time_delta" showname="Time delta from previous captured frame: 0.002064000 seconds" size="0" pos="0" show="0.002064000"/>
    <field name="frame.time_delta_displayed" showname="Time delta from previous displayed frame: 0.002064000 seconds" size="0" pos="0" show="0.002064000"/>
    <field name="frame.time_relative" showname="Time since reference or first frame: 0.002064000 seconds" size="0" pos="0" show="0.002064000"/>
    <field name="frame.number" showname="Frame Number: 2" size="0" pos="0" show="2"/>
    <field name="frame.len" showname="Frame Length: 121 bytes" size="0" pos="0" show="121"/>
    <field name="frame.cap_len" showname="Capture Length: 121 bytes" size="0" pos="0" show="121"/>
    <field name="frame.marked" showname="Frame is marked: False" size="0" pos="0" show="0"/>
    <field name="frame.protocols" showname="Protocols in frame: eth:ip:udp:snmp" size="0" pos="0" show="eth:ip:udp:snmp"/>
    <field name="frame.coloring_rule.name" showname="Coloring Rule Name: UDP" size="0" pos="0" show="UDP"/>
    <field name="frame.coloring_rule.string" showname="Coloring Rule String: udp" size="0" pos="0" show="udp"/>
  </proto>
  <proto name="eth" showname="Ethernet II, Src: Azurewav_bf:17:43 (00:25:d3:bf:17:43), Dst: 54:67:51:7e:c1:ab (54:67:51:7e:c1:ab)" size="14" pos="0">
    <field name="eth.dst" showname="Destination: 54:67:51:7e:c1:ab (54:67:51:7e:c1:ab)" size="6" pos="0" show="54:67:51:7e:c1:ab" value="5467517ec1ab">
      <field name="eth.addr" showname="Address: 54:67:51:7e:c1:ab (54:67:51:7e:c1:ab)" size="6" pos="0" show="54:67:51:7e:c1:ab" value="5467517ec1ab"/>
      <field name="eth.ig" showname=".... ...0 .... .... .... .... = IG bit: Individual address (unicast)" size="3" pos="0" show="0" value="0" unmaskedvalue="546751"/>
      <field name="eth.lg" showname=".... ..0. .... .... .... .... = LG bit: Globally unique address (factory default)" size="3" pos="0" show="0" value="0" unmaskedvalue="546751"/>
    </field>
    <field name="eth.src" showname="Source: Azurewav_bf:17:43 (00:25:d3:bf:17:43)" size="6" pos="6" show="00:25:d3:bf:17:43" value="0025d3bf1743">
      <field name="eth.addr" showname="Address: Azurewav_bf:17:43 (00:25:d3:bf:17:43)" size="6" pos="6" show="00:25:d3:bf:17:43" value="0025d3bf1743"/>
      <field name="eth.ig" showname=".... ...0 .... .... .... .... = IG bit: Individual address (unicast)" size="3" pos="6" show="0" value="0" unmaskedvalue="0025d3"/>
      <field name="eth.lg" showname=".... ..0. .... .... .... .... = LG bit: Globally unique address (factory default)" size="3" pos="6" show="0" value="0" unmaskedvalue="0025d3"/>
    </field>
  </proto>
  <proto name="ip" showname="Internet Protocol, Src: 192.168.0.52 (192.168.0.52), Dst: 192.168.1.103 (192.168.1.103)" size="20" pos="14">
    <field name="ip.version" showname="Version: 4" size="1" pos="14" show="4" value="45"/>
    <field name="ip.hdr_len" showname="Header length: 20 bytes" size="1" pos="14" show="20" value="45"/>
    <field name="ip.dsfield" showname="Differentiated Services Field: 0x00 (DSCP 0x00: Default; ECN: 0x00)" size="1" pos="15" show="0" value="00">
      <field name="ip.dsfield.dscp" showname="0000 00.. = Differentiated Services Codepoint: Default (0x00)" size="1" pos="15" show="0x00" value="0" unmaskedvalue="00"/>
      <field name="ip.dsfield.ect" showname=".... ..0. = ECN-Capable Transport (ECT): 0" size="1" pos="15" show="0" value="0" unmaskedvalue="00"/>
      <field name="ip.dsfield.ce" showname=".... ...0 = ECN-CE: 0" size="1" pos="15" show="0" value="0" unmaskedvalue="00"/>
    </field>
    <field name="ip.len" showname="Total Length: 107" size="2" pos="16" show="107" value="006b"/>
    <field name="ip.id" showname="Identification: 0x7b6b (31595)" size="2" pos="18" show="0x7b6b" value="7b6b"/>
    <field name="ip.flags" showname="Flags: 0x00" size="1" pos="20" show="0x00" value="00">
      <field name="ip.flags.rb" showname="0.. = Reserved bit: Not Set" size="1" pos="20" show="0" value="0" unmaskedvalue="00"/>
      <field name="ip.flags.df" showname=".0. = Don&apos;t fragment: Not Set" size="1" pos="20" show="0" value="0" unmaskedvalue="00"/>
      <field name="ip.flags.mf" showname="..0 = More fragments: Not Set" size="1" pos="20" show="0" value="0" unmaskedvalue="00"/>
    </field>
    <field name="ip.frag_offset" showname="Fragment offset: 0" size="2" pos="20" show="0" value="0000"/>
    <field name="ip.ttl" showname="Time to live: 128" size="1" pos="22" show="128" value="80"/>
    <field name="ip.proto" showname="Protocol: UDP (0x11)" size="1" pos="23" show="0x11" value="11"/>
    <field name="ip.checksum" showname="Header checksum: 0x3c2b [validation disabled]" size="2" pos="24" show="0x3c2b" value="3c2b">
      <field name="ip.checksum_good" showname="Good: False" size="2" pos="24" show="0" value="3c2b"/>
      <field name="ip.checksum_bad" showname="Bad : False" size="2" pos="24" show="0" value="3c2b"/>
    </field>
    <field name="ip.src" showname="Source: 192.168.0.52 (192.168.0.52)" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.addr" showname="Source or Destination Address: 192.168.0.52 (192.168.0.52)" hide="yes" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.src_host" showname="Source Host: 192.168.0.52" hide="yes" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.host" showname="Source or Destination Host: 192.168.0.52" hide="yes" size="4" pos="26" show="192.168.0.52" value="c0a80034"/>
    <field name="ip.dst" showname="Destination: 192.168.1.103 (192.168.1.103)" size="4" pos="30" show="192.168.1.103" value="c0a80167"/>
    <field name="ip.addr" showname="Source or Destination Address: 192.168.1.103 (192.168.1.103)" hide="yes" size="4" pos="30" show="192.168.1.103" value="c0a80167"/>
    <field name="ip.dst_host" showname="Destination Host: 192.168.1.103" hide="yes" size="4" pos="30" show="192.168.1.103" value="c0a80167"/>
    <field name="ip.host" showname="Source or Destination Host: 192.168.1.103" hide="yes" size="4" pos="30" show="192.168.1.103" value="c0a80167"/>
  </proto>

</packet>

</pdml>
4

1 回答 1

0

您正在询问start事件中的子项数量,这意味着元素的内容尚未完全处理。如果我更改您的代码,以便我们只查看end事件中的孩子数,我们将看到您期望的输出:

import xml.etree.ElementTree as ET

print('Parse')
tree = ET.parse('data.xml')
pdml = tree.getroot()
for packet in pdml:
    for proto in packet:
        if proto.get('name') == 'ip':
            print (len(proto.getchildren()))

print('Iterparse')
for event, elem in ET.iterparse('data.xml', events=('end',)):
    if elem.get('name') == 'ip':
        print(len(elem.getchildren()))

输出:

Parse
18
18
Iterparse
18
18
于 2017-08-20T01:47:36.697 回答