1

我目前正在构建大型 xml 文件,xml.dom.minidom然后通过toprettyxml. 有没有办法将 xml 流式传输到文档,因为我遇到了内存错误。

def run(self):
    while True:
        domain = self.queue.get()
        try:
            conn = boto.connect_sdb(awsa, awss)
            sdbdomain = conn.get_domain(domain)
            s3conn = boto.connect_s3(awsa, awss)
            archbucket = s3conn.get_bucket("simpledbbu")
            doc = None
            doc = Document()
            root = doc.createElement("items")
            doc.appendChild(root)
            countermax = 0
            counter = 0
            for item in sdbdomain:
                node = doc.createElement("item")
                node.setAttribute("itemName", item.name)
                for k,v in item.items():
                    if not isinstance(v, basestring):
                        i = 0
                        for val in v:
                            node.setAttribute("{0}::{1}".format(k,i),val)
                            i += 1
                    else:
                        node.setAttribute(k,v)
                root.appendChild(node)
            k = Key(archbucket)
            k.key = "{0}/{1}.xml".format(datetime.date.today().strftime("%Y%m%d"),sdbdomain.name)
            #x = doc.toprettyxml(indent="  ")
            f = open(domain + ".xml", "w")
            f.truncate()
            f.write(doc.toprettyxml(indent="  "))
            f.close()
            #k.content_type.encode('ascii')
            k.set_contents_from_filename(f.name)
            os.remove(os.path.join(os.getcwd(),f.name))
        except:
            print "failed to load domain: {0}".format(domain)
            print formatExceptionInfo()
        finally:
            self.queue.task_done()
4

1 回答 1

1

使用 xml.dom.minidom 构建大型 xml 文件,然后通过 toprettyxml 将它们写入文件。

如果您的内存不足,您可能应该停止这样做。

您可以使用简单的字符串操作来构建 XML。

with open(domain + ".xml", "w") as  f:
    f.write( "<?xml..." )
    f.write( "<items>" )
    for item in sdbdomain:
      buffer= []
      for k,v in item.items():
          if not isinstance(v, basestring):
            for i, val in enumerate(v):
              txt= '{0}::{1}="{2}"'.format(k,i,val)
          else:
            txt= '{0}="{1}"'.format(k,v)
          buffer.append( txt )
       f.write( "  <item {0}/>\n".format( " ".join(buffer) ))
     f.write( "</items>" )
k= ................      
k.set_contents_from_filename(f.name)

类似的东西应该允许您将 XML 写入临时文件,而无需在内存中创建大型 DOM 对象。

于 2011-07-12T21:04:59.503 回答