我有这个简单的代码:
#usr/bin/python
from bs4 import BeautifulSoup
import requests
import tldextract
def scrape(url):
main_domain = tldextract.extract(url)
r = requests.get(url)
data = r.text
soup = BeautifulSoup(data)
list = []
for href in soup.find_all('a'):
link_domain = tldextract.extract(href.get('href'))
print link_domain
print
获取错误为:
Traceback (most recent call last):
File "cloud.py", line 20, in <module>
scrape("--- url here -- ")
File "cloud.py", line 14, in scrape
link_domain = tldextract.extract(href.get('href'))
File "/usr/lib/python2.6/site-packages/tldextract/tldextract.py", line 196, in extract
return TLD_EXTRACTOR(url)
File "/usr/lib/python2.6/site-packages/tldextract/tldextract.py", line 127, in __call__
netloc = SCHEME_RE.sub("", url) \
TypeError: expected string or buffer
我该如何解决。