from newspaper import Article
import pdb
from unidecode import unidecode
def get_article_newspaper(url):
article = Article(url,en='zh') # Chinese
article.download();
article.parse()# article.text if blank!
print unidecode(article.text).replace('Image caption','')
url='http://www.tyfzw.cn/?sw=774&b=177%20'
get_article_newspaper(url)
这似乎是最受尝试的维护。此外,尝试过鹅和锅炉管都不起作用。
后来也想翻译:
import goslate
def language_translate(text): #translates to language
gs = goslate.Goslate()
language_id = gs.detect('text')
if language_id != 'en':
text=gs.translate(text, 'en')
return text