所以我的 CSV_Output 文件是空的,尽管我没有收到任何错误。我试图从我的 CSV_to_Read 文件中再添加一列。article.cleaned_text 的打印有效。所以我觉得我只是在这里做一些愚蠢的事情。谢谢!
from csv import reader, writer
import unicodecsv as csv
from goose import Goose
with open('CSV_to_Read.csv','r') as csvfile:
readCSV = csv.reader(csvfile, encoding='utf-8')
out = writer(open("CSV_Output.csv", "a"))
for row in readCSV:
g = Goose({'browser_user_agent': 'Mozilla', 'parser_class':'soup'})
try:
article = g.extract(url=row[0])
print article.cleaned_text
out.writerow([row[0], row[1], row[2], row[3], row[4], row[5], row[6], article.cleaned_text, row[7], row[8], row[9]])
except Exception:
pass