我需要对这个 html 页面http://gnats.netbsd.org/summary/year/2012-perf.html进行分类,我需要从大表中列出最重要的问题。这是我在 Python.I 中的代码如果您能给我一些建议,将非常感激。
import urllib.request
from bs4 import BeautifulSoup
# overall input
inputpage = urllib.request.urlopen("http://gnats.netbsd.org/summary/year/2012-perf.html")
page = inputpage.read()
soup = BeautifulSoup(page)
# checking tables
table = soup.findAll('table')
rows = soup.findAll('tr')
colomns = soup.findAll('td')
# inputing the lists
name = []
first = []
second = []
sum = []
# the main part
for tr in rows:
if (tr==1):
element = tr.split("<td>")
name.append(element)
elif (tr==2):
element = tr.split("<td>")
first.append(element)
elif (tr==3):
element = tr.split("<td>")
second.append(element)
# combining the open and closed issue lists
length = len(first)
for i in range(length):
sum = first[i] + second [i]
# printing the lists
length = len(sum)
for i in range(length):
print (name[i] + '|' + sum[i])