import xml.dom.minidom
content = """
<urlset xmlns="http://www.google.com/schemas/sitemap/0.90">
<url>
<loc>http://www.domain.com/</loc>
<lastmod>2011-01-27T23:55:42+01:00</lastmod>
<changefreq>daily</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>http://www.domain.com/page1.html</loc>
<lastmod>2011-01-26T17:24:27+01:00</lastmod>
<changefreq>daily</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>http://www.domain.com/page2.html</loc>
<lastmod>2011-01-26T15:35:07+01:00</lastmod>
<changefreq>daily</changefreq>
<priority>0.5</priority>
</url>
</urlset>
"""
xml = xml.dom.minidom.parseString(content)
urlset = xml.getElementsByTagName("urlset")[0]
url = urlset.getElementsByTagName("url")
for i in range(0, url.length):
loc = url[i].getElementsByTagName("loc")[0].childNodes[0].nodeValue
lastmod = url[i].getElementsByTagName("lastmod")[0].childNodes[0].nodeValue
changefreq = url[i].getElementsByTagName("changefreq")[0].childNodes[0].nodeValue
priority = url[i].getElementsByTagName("priority")[0].childNodes[0].nodeValue
print "%s, %s, %s, %s" % (loc, lastmod, changefreq, priority)
没有更简单的方法来获取节点的值吗?
loc = url[i].getElementsByTagName("loc")[0].childNodes[0].nodeValue