在我的项目中,我看到了两个问题:
- 我收到 8/10 次
HTTP Error 503: Service Temporarily Unavailable
。 - 我是否正确地执行重复?
我需要寻找一个名为的类class="torType"
,然后只获取链接 URL,我有它的工作方式,但我做得对吗?
#import urllib.request
#from bs4 import BeautifulSoup
WebUrl = 'http://kickasstorrents.come.in/tv/'
def RetrieveWebData(MyUrl):
try:
opener = urllib.request.build_opener()
opener.addheaders =[('User-agent','Mozilla/5.0')]
url = (MyUrl)
page = opener.open(url).read()
return page
except OSError as e:
print("An error occurred reading the webpage ",e)
def FilterWebData(RawData):
try:
soup = BeautifulSoup(RawData)
TorData = soup.find_all("a", {"class" : "torType"})
soup = BeautifulSoup(str(TorData))
for link in soup.find_all('a'):
RecentTors = link.get('href')
return RecentTors
except OSError as e:
print("An error occurred during filtering",e)
RawPage = RetrieveWebData(WebUrl)
RecentTorrentLinks = FilterWebData(RawPage)
print(RecentTorrentLinks)