2

我有一些具有以下结构的 zipfile ( 700+ ) ( 文件完全像这样 )

<?xml version="1.0" encoding="UTF-8"?>
<Values version="2.0">
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">false</value>
    <value name="retrievalSuspended">false</value>
</record>
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">false</value>
    <value name="retrievalSuspended">false</value>
</record>
</Values>

我想要实现的是替换,无论第一次出现的字段processingSuspendedretrieveSuspended的值 是真还是假。将其替换为假。但仅限于第一次出现。

编辑:

通过请求我添加我到目前为止所拥有的,我可以在哪里获得我想要的字段,但是。我相信有一种更简单的方法可以做到这一点。:

import os
import zipfile
import glob
import time
import re

def main():
    rList = []
    for z in glob.glob("*.zip"):
        root = zipfile.ZipFile(z)
        for filename in root.namelist():
            if filename.find("node.ndf") >= 0:
                for line in root.read(filename).split("\n"):
                    if line.find("broker-trigger") >= 0:
                        for iline in root.read(filename).split("\n"):
                            Values = dict()
                            #match Processing state
                            if iline.find("processingSuspended") >= 0:
                                mpr = re.search(r'(.*>)(.*?)(<.*)', 
                                                iline, re.M|re.I)
                            #match Retrieval state
                            if iline.find("retrievalSuspended") >= 0:
                                mr = re.search(r'(.*>)(.*?)(<.*)', 
                                               iline, re.M|re.I)
                                Values['processingSuspended'] = mpr.group(2)
                                Values['retrievalSuspended'] = mr.group(2)
                                #print mr.group(2)
                                rList.append(Values)
    print rList

if __name__== "__main__":
    main()

提前致谢。

4

2 回答 2

1

尝试使用lxml

>>> xml = '''\
<?xml version="1.0" encoding="UTF-8"?>
<Values version="2.0">
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">true</value>
    <value name="retrievalSuspended">true</value>
</record>
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">true</value>
    <value name="retrievalSuspended">true</value>
</record>
</Values>\
'''

>>> from lxml import etree
>>> tree = etree.fromstring(xml)
>>> tree.xpath('//value[@name="processingSuspended"]')[0].text = 'false'
>>> tree.xpath('//value[@name="retrievalSuspended"]')[0].text = 'false'

xpath表达式'//value[@name="processingSuspended"]'查找属性等于 的所有标记。然后我们只取第一个 with并将标签的文本更改为.valuename"processingSuspended"[0]'false'

输出:

>>> print(etree.tostring(tree, pretty_print=True))
<Values version="2.0">
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">false</value>
    <value name="retrievalSuspended">false</value>
</record>
<record name="trigger">
    <value name="uniqueId">6xjUCpDlrTVHRsEVmxx0Ews6ni8=</value>
    <value name="processingSuspended">true</value>
    <value name="retrievalSuspended">true</value>
</record>
</Values>

>>> 
于 2013-07-10T18:46:04.717 回答
0

您可以阅读 zip 存档并使用 Python 的内置模块更新它们包含的文件中的 xml 格式数据。文档中甚至还有一个教程xml.etree.ElementTree

import glob
import xml.etree.ElementTree as ET
import zipfile

def main():
    for z in glob.glob("*.zip"):
        print 'processing file: {!r}'.format(z)
        zfile = zipfile.ZipFile(z)
        for filename in zfile.namelist():
            print 'processing archive member: {!r} in {}'.format(filename, z)
            contents = zfile.open(filename).read()

            print 'Before changes:'
            print contents

            root = ET.fromstring(contents)
            if root.tag != "Values" or root.attrib["version"] != "2.0":
                print 'unsupported xml file'
                break

            if(root[0][1].tag == "value" and
               root[0][1].attrib["name"] == "processingSuspended"):
                root[0][1].text = "false"
            else:
                print 'expected "processingSuspended" value field not found'
                break

            if(root[0][2].tag == "value" and
               root[0][2].attrib["name"] == "retrievalSuspended"):
                root[0][2].text = "false"
            else:
                print 'expected "retrievalSuspended" value field not found'
                break

            print 'After changes:'
            updated_contents = ET.tostring(root)
            print updated_contents

if __name__== "__main__":
    main()
于 2013-07-11T15:18:46.173 回答