我想获取网页的纯文本,其中存储了 url,然后在同一个表中添加文本进行更新。出于某种原因,在打开大约 800 个链接后,程序结束了。以下是部分代码:
db=_mysql.connect("localhost",user="",passwd="",db="noticias")
db.query("""select id,url from news where id>17821""")
results = db.store_result()
numrows = results.num_rows()
for i in range(0,numrows):
row = results.fetch_row()
link = row[0][1] # URL to open
while True:
try:
SourceCode = urlopen(link)
break
except:
.........
SourceCode.read()
SourceCode.close()
.........
title = str(title)
plaintext = str(plaintext)
query = "UPDATE news SET title = '"+title+"',plaintext ='"+plaintext+"' WHERE id ="+row[0][0]
try:
db.query(query)
break
except:
db.close()