我正在尝试将以下代码的输出写入 csv 文件。数据被覆盖。所以最后我只能在输出文件中看到从网站上抓取的最后一个数据。
from bs4 import BeautifulSoup
import urllib2
import csv
import re
import requests
for i in xrange(3179,7000):
try:
page = urllib2.urlopen("http://bvet.bytix.com/plus/trainer/default.aspx?id={}".format(i))
except:
continue
else:
soup = BeautifulSoup(page.read())
for eachuniversity in soup.findAll('fieldset',{'id':'ctl00_step2'}):
data = i, re.sub(r'\s+',' ',''.join(eachuniversity.findAll(text=True)).encode('utf-8')),'\n'
print data
myfile = open("ttt.csv", 'wb')
wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
wr.writerow(data)
我是新来的。我不知道我错在哪里。
更新
from bs4 import BeautifulSoup
import urllib2
import csv
import re
import requests
with open("BBB.csv", 'wb') as myfile:
writer = csv.writer(myfile, quoting=csv.QUOTE_ALL)
for i in xrange(3179,7000):
try:
page = urllib2.urlopen("http://bvet.bytix.com/plus/trainer/default.aspx?id={}".format(i))
except Exception:
continue
else:
soup = BeautifulSoup(page.read())
for eachuniversity in soup.findAll('fieldset',{'id':'ctl00_step2'}):
data = [i] + [re.sub('\s+', '', text).encode('utf8') for text in eachuniversity.find_all(text=True) if text.strip()]
writer.writerow(data)