python - Python eTree Parser 没有附加元素

Question

查看我的日志，看看它是如何说我从 Postgres 返回的行已从字符串转换为元素（我打印字符串，打印元素，打印 isElement 布尔值！）但是当我尝试附加它，错误是它不是一个元素。呼，呼。

import sys
from HTMLParser import HTMLParser
from xml.etree import cElementTree as etree
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement, tostring
import psycopg2
import psycopg2.extras

def main():
    # Connect to an existing database
    conn = psycopg2.connect(dbname="**", user="**", password="**", host="/tmp/", port="**")

    # Open a cursor to perform database operations
    cur = conn.cursor(cursor_factory = psycopg2.extras.RealDictCursor)

    cur.execute("SELECT * FROM landingpagedata;")
    rows = cur.fetchall()

    class LinksParser(HTMLParser):
      def __init__(self):
          HTMLParser.__init__(self)
          self.tb = etree.TreeBuilder()

      def handle_starttag(self, tag, attributes):
          self.tb.start(tag, dict(attributes))

      def handle_endtag(self, tag):
          self.tb.end(tag)

      def handle_data(self, data):
          self.tb.data(data)

      def close(self):
          HTMLParser.close(self)
          return self.tb.close()

    template = 'template.html'



    # parser.feed(open('landingIndex.html').read()) #for testing
    # root = parser.close()

    for row in rows:
        parser = LinksParser()

        parser.feed(open(template).read())
        root = parser.close()




        #title
        title = root.find(".//title")
        title.text = row['title']

        #headline
        h1_id_headline = root.find(".//h1")
        h1_id_headline.text = row['h1_id_headline']
        # print row['h1_id_headline']

        #intro
        p_class_intro = root.find(".//p[@class='intro']")
        p_class_intro.text = row['p_class_intro']
        # print row['p_class_intro']

这就是问题发生的地方！

        #recommended
        p_class_recommendedbackground = root.find(".//div[@class='recommended_background_div']")
        print p_class_recommendedbackground
        p_class_recommendedbackground.clear()
        newElement = ET.fromstring(row['p_class_recommendedbackground'])
        print row['p_class_recommendedbackground']
        print ET.iselement(newElement)
        p_class_recommendedbackground.append(newElement)

        html = tostring(root)
        f = open(row['page_name'], 'w').close()
        f = open(row['page_name'], 'w')
        f.write(html)
        f.close()
        # f = ''
        # html = ''
        parser.reset()
        root = ''

    # Close communication with the database
    cur.close()
    conn.close()

if __name__ == "__main__":
  main()

我的日志是这样的：

{background: url(/images/courses/azRealEstate.png) center no-repeat;}
<Element 'div' at 0x10a999720>
<p class="recommended_background">Materials are are aimed to all aspiring real estate sales associates who wish to obtain the Arizona Real Estate Salesperson license, which is provided by the <a href="http://www.re.state.az.us/" style="text-decoration: underline;">Arizona Department of Real Estate</a>.</p>
True
Traceback (most recent call last):
  File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 108, in <module> main()
  File "/Users/Morgan13/Programming/LandingPageBuilder/landingPages/landingBuilderTest.py", line 84, in main
    p_class_recommendedbackground.append(newElement)
TypeError: must be Element, not Element
[Finished in 0.1s with exit code 1]

score 1 · Accepted Answer

我可以通过这种方式重现错误消息：

from xml.etree import cElementTree as etree
import xml.etree.ElementTree as ET

croot = etree.Element('root')
child = ET.Element('child')
croot.append(child)
# TypeError: must be Element, not Element

问题的根本原因是我们将的cElementTree实现ElementTree与xml.etree.ElementTree. ElementTree永远不要让双胞胎相遇。

所以解决方法就是选择一个，比如说etree，替换所有出现的另一个（例如用替换ET）etree。

python - Python eTree Parser 没有附加元素

1 回答 1

Related

Reference