你可以试试这样的。
from BeautifulSoup import BeautifulSoup
def getPrintUnicode(soup):
body=''
if isinstance(soup, unicode):
soup = soup.replace(''',"'")
soup = soup.replace('"','"')
soup = soup.replace(' ',' ')
soup = soup.replace('>','>')
soup = soup.replace('<','<')
body = body + soup
else:
if not soup.contents:
return ''
con_list = soup.contents
for con in con_list:
body = body + getPrintUnicode(con)
return body
print getPrintUnicode(BeautifulSoup('<td class="ln">15</td><td class="sf3b2"><code> </code></td>'))
您可以在整个页面的汤中使用此 getPrintUnicode() 函数。它将返回完整的内容。使用异常并将字符串转换为整数。例如。
print int(getPrintUnicode(BeautifulSoup('<td class="ln">15</td><td class="sf3b2"><code> </code></td>')))