0

我有两个如下所示的 XML。如何将其中 2 个与脚本合并以从 new.xml 中获取值并保留不存在的属性值形式 base.xml

base.xml

 <element name="ind"
          dbs="name1, name2, name4"
          server="ServerName"
          good-attribute="234"/>  

我的new.xml看起来像这样:

<element name="ind"
         description="My desc"
         dbId="someId"
         moreAttr="someVal"
         dbs="name1, name2, name4, name12, name3"
         server="ServerName" />

我想从 to 获取最新价值,dbs并且保留价值仅存在于:new.xmlmerge.xmlgood-attributebase.xml

merge.xml

<element name="ind"
         description="My desc"
         dbId="someId"
         moreAttr="someVal"
         dbs="name1, name2, name4, name12, name3"
         server="ServerName"
         good-attribute="234" />

使用 paste just pastefieldwise 并vimdiff显示差异,但不允许选择。是否有任何内置支持或需要用 sed + awk 替换它来实现它?

4

1 回答 1

1

这是Python中的直接实现:

#!/usr/bin/env python
# Python 2.5+ compatible
import sys
import xml.etree.ElementTree as etree

def main():
    output_file = getattr(sys.stdout, 'buffer', sys.stdout)
    dict2xml(merge_dicts(*map(xml2dict, sys.argv[1:])), output_file)

def xml2dict(source):
    return dict((el.get('name'), el.attrib)
                for el in etree.parse(source).getiterator('element'))

def merge_dicts(base, new):
    merged = {}
    # for each element from new xml
    for name, attr in new.items():
        # combine attributes from base and new xmls preferring new values
        d = merged[name] = base.get(name, {})
        d.update(attr)
    return merged

def dict2xml(d, sink):
    root = etree.Element('root')
    for name, attr in d.items():
        etree.SubElement(root, 'element', attr)
    etree.ElementTree(root).write(sink, encoding='utf-8')

main()

将此代码保存到merge-xml文件并运行chmod +x merge-xml. 然后:

$ ./merge-xml base.xml new.xml >merge.xml

这是 Python 2.4+ 兼容版本:

#!/usr/bin/env python
import sys
from xml.dom import minidom

def main():
    output_file = getattr(sys.stdout, 'buffer', sys.stdout)
    dict2xml(merge_dicts(*map(xml2dict, sys.argv[1:])), output_file)

def xml2dict(source):
    doc = minidom.parse(source)
    return dict((el.getAttribute('name'), attr2dict(el.attributes))
                for el in doc.getElementsByTagName('element'))

def attr2dict(nodemap):
    d = {}
    for i in range(nodemap.length):
        attr = nodemap.item(i)
        d[attr.name] = attr.value
    return d

def merge_dicts(base, new):
    merged = {}
    # for each element from new xml
    for name, attr in new.items():
        # combine attributes from base and new xmls preferring new values
        d = merged[name] = base.get(name, {})
        d.update(attr)
    return merged

def dict2xml(d, sink):
    doc = minidom.getDOMImplementation().createDocument(None, "root", None)
    root = doc.documentElement
    for name, attr in d.items():
        el = doc.createElement('element')
        for name, value in attr.items():
            el.setAttribute(name, value)
        root.appendChild(el)
    sink.write(doc.toprettyxml(encoding='utf-8'))

main()
于 2013-11-15T03:51:55.457 回答