python - 在 python 中重构这个字典到 xml 的转换器

Question

这是一件小事，真的：我有这个函数可以将 dict 对象转换为 xml。

这是功能：

def dictToXml(d):
    from xml.sax.saxutils import escape

    def unicodify(o):
        if o is None:
            return u'';
        return unicode(o)

    lines = []
    def addDict(node, offset):
        for name, value in node.iteritems():
            if isinstance(value, dict):
                lines.append(offset + u"<%s>" % name)
                addDict(value, offset + u" " * 4)
                lines.append(offset + u"</%s>" % name)
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        lines.append(offset + u"<%s>" % name)
                        addDict(item, offset + u" " * 4)
                        lines.append(offset + u"</%s>" % name)
                    else:
                        lines.append(offset + u"<%s>%s</%s>" % (name, escape(unicodify(item)), name))
            else:
                lines.append(offset + u"<%s>%s</%s>" % (name, escape(unicodify(value)), name))

    addDict(d, u"")
    lines.append(u"")
    return u"\n".join(lines)

例如，它转换这个字典

{ 'site': { 'name': 'stackoverflow', 'blogger': [ 'Jeff', 'Joel' ] } }

到：

<site>
    <name>stackoverflow</name>
    <blogger>jeff</blogger>
    <blogger>joel</blogger>
</site>

它有效，但该addDict功能看起来有点过于重复。我确信有一种方法可以将它重构为 3 个名为和的协同递归函数addDict，但我的大脑卡住了。有什么帮助吗？addListaddElse

此外，任何摆脱offset +每一行中的东西的方法都会很好。

注意：我选择这些语义是因为我试图匹配org.json中的 json-to-xml 转换器的行为，我在项目的不同部分中使用了它。如果您只是为了寻找字典到 xml 转换器而进入此页面，那么在某些答案中有一些非常好的选择。（特别是pyfo）。

score 9 · Accepted Answer

>>> from pyfo import pyfo
>>> d = ('site', { 'name': 'stackoverflow', 'blogger': [ 'Jeff', 'Joel' ] } )
>>> result = pyfo(d, pretty=True, prolog=True, encoding='ascii')
>>> print result.encode('ascii', 'xmlcharrefreplace')
<?xml version="1.0" encoding="ascii"?>
<site>
  <blogger>
    Jeff
    Joel
  </blogger>
  <name>stackoverflow</name>
</site>

要安装pyfo：

$ easy_install pyfo

score 4 · Accepted Answer

我注意到您在添加项目方面有共同点。使用这种共性，我会重构将一个项目添加到一个单独的函数中。

def addItem(item, name, offset):
          if isinstance(item, dict):
                lines.append(offset + u"<%s>" % name)
                addDict(item, offset + u" " * 4)
                lines.append(offset + u"</%s>" % name)
          else:
                lines.append(offset + u"<%s>%s</%s>" % (name, escape(unicodify(item)), name))

def addList(value,name, offset):
        for item in value:
            addItem(item, name, offset)

def addDict(node, offset):
        for name, value in node.iteritems():
            if isinstance(value, list):
                addList(value, name, offset)
            else:
                addItem(value, name, offset)

咨询警告：此代码未经实际使用 Python 的任何人测试或编写。

score 1 · Accepted Answer

摆脱重复的“偏移+”：

offset = 0
def addLine(str):
    lines.append(u" " * (offset * 4) + str

然后

...
    addLine(u"<%s>" % name)
    offset = offset + 1
    addDict(value)
    offset = offset - 1
    addLine(u"</%s>" % name)

这里没有口译员，所以请谨慎对待:(

score 1 · Accepted Answer

您的原始代码会生成格式错误的 XML，并且可以为两个不同的字典生成相同的 XML（不是单射的，从数学上讲）。

例如，如果您有一个列表作为字典中唯一键的值：

 d = { 'list': [1,2,3] }

我希望你的代码会产生

 <list>1</list><list>2</list><list>3</list>

并且没有根元素。任何 XML 都应该只有一个根元素。

然后给定您的代码生成的 XML，不可能说这个 XML

 <tag>1</tag>

从{ 'tag': 1 } 或从生产{ 'tag': [1] }。

所以，我建议

总是从根元素开始
表示带有两个特殊标签（例如<list/>和<item/>）的列表或在属性中将它们标记为这样

然后，在对这些概念上的缺陷做出决定之后，我们可以生成正确且明确的 XML。我选择使用属性来标记列表，并使用 ElementTree 自动构建 XML 树。此外，递归有助于（add_value_to_xml递归调用）：

from xml.etree.ElementTree import Element, SubElement, tostring

def is_scalar(v):
    return isinstance(v,basestring) or isinstance(v,float) \
        or isinstance(v,int) or isinstance(v,bool)

def add_value_to_xml(root,v):
    if type(v) == type({}):
        for k,kv in v.iteritems():
            vx = SubElement(root,unicode(k))
            vx = add_value_to_xml(vx,kv)
    elif type(v) == list:
        root.set('type','list')
        for e in v:
            li = SubElement(root,root.tag)
            li = add_value_to_xml(li,e)
            li.set('type','item')
    elif is_scalar(v):
        root.text = unicode(v)
    else:
        raise Exception("add_value_to_xml: unsuppoted type (%s)"%type(v))
    return root

def dict_to_xml(d,root='dict'):
    x = Element(root)
    x = add_value_to_xml(x,d)
    return x

d = { 'float': 5194.177, 'str': 'eggs', 'int': 42,
        'list': [1,2], 'dict': { 'recursion': True } }
x = dict_to_xml(d)
print tostring(x)

测试dict的转换结果为：

<dict><int>42</int><dict><recursion>True</recursion></dict><float>5194.177</float><list type="list"><list type="item">1</list><list type="item">2</list></list><str>eggs</str></dict>

score 0 · Accepted Answer

这是我的解决方案的简短草图：有一个addSomething()根据值的类型分派到addDict()或addList()的通用函数addElse()。这些函数再次递归调用addSomething()。

基本上，您正在分解if子句中的部分并添加递归调用。

score 0 · Accepted Answer

以下是我在使用 XML 时发现的有用信息。实际上首先创建 XML 节点结构，然后将其渲染为文本。

这将两个不相关的问题分开。

如何将我的 Python 结构转换为 XML 对象模型？
如何格式化该 XML 对象模型？

当你把这两个东西放在一个函数中时，这很难。另一方面，如果你将它们分开，那么你有两件事。首先，您有一个相当简单的函数来“遍历”您的 Python 结构并返回一个 XML 节点。您的 XML 节点可以呈现为应用了一些首选编码和格式规则的文本。

from xml.sax.saxutils import escape

class Node( object ):
    def __init__( self, name, *children ):
        self.name= name
        self.children= children
    def toXml( self, indent ):
        if len(self.children) == 0:
            return u"%s<%s/>" % ( indent*4*u' ', self.name )
        elif len(self.children) == 1:
            child= self.children[0].toXml(0)
            return u"%s<%s>%s</%s>" % ( indent*4*u' ', self.name, child, self.name )
        else:
            items = [ u"%s<%s>" % ( indent*4*u' ', self.name ) ]
            items.extend( [ c.toXml(indent+1) for c in self.children ] )
            items.append( u"%s</%s>" % ( indent*4*u' ', self.name ) )
            return u"\n".join( items )

class Text( Node ):
    def __init__( self, value ):
        self.value= value
    def toXml( self, indent ):
        def unicodify(o):
            if o is None:
                return u'';
            return unicode(o)
        return "%s%s" % ( indent*4*u' ', escape( unicodify(self.value) ), )

def dictToXml(d):

    def dictToNodeList(node):
        nodes= []
        for name, value in node.iteritems():
            if isinstance(value, dict):
                n= Node( name, *dictToNodeList( value ) )
                nodes.append( n )
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        n= Node( name, *dictToNodeList( value ) )
                        nodes.append( n )
                    else:
                        n= Node( name, Text( item ) )
                        nodes.append( n )
            else:
                n= Node( name, Text( value ) )
                nodes.append( n )
        return nodes

    return u"\n".join( [ n.toXml(0) for n in dictToNodeList(d) ] )

python - 在 python 中重构这个字典到 xml 的转换器

6 回答 6

Related

Reference