在 Windows 8 上使用 Python 3.3遵循Sklearn 教程时,我们必须获取新闻组数据集。但是下载后,iPython 会抛出以下错误。这是一个错误吗?
执行的代码
categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
from sklearn.datasets import fetch_20newsgroups
twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
错误
C:\Python33\lib\site-packages\sklearn\datasets\twenty_newsgroups.py:79: DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead
logger.warn("Downloading dataset from %s (14 MB)", URL)
Downloading dataset from http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz (14 MB)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-2-ab16c84fd2ef> in <module>()
1 categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']
2 from sklearn.datasets import fetch_20newsgroups
----> 3 twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)
C:\Python33\lib\site-packages\sklearn\datasets\twenty_newsgroups.py in fetch_20newsgroups(data_home, subset, categories, shuffle, random_state, download_if_missing)
142 if download_if_missing:
143 cache = download_20newsgroups(target_dir=twenty_home,
--> 144 cache_path=cache_path)
145 else:
146 raise IOError('20Newsgroups dataset not found')
C:\Python33\lib\site-packages\sklearn\datasets\twenty_newsgroups.py in download_20newsgroups(target_dir, cache_path)
86
87 # Store a zipped pickle
---> 88 cache = dict(train=load_files(train_path, charset='latin1'),
89 test=load_files(test_path, charset='latin1'))
90 open(cache_path, 'wb').write(pickle.dumps(cache).encode('zip'))
C:\Python33\lib\site-packages\sklearn\datasets\base.py in load_files(container_path, description, categories, load_content, shuffle, charset, charse_error, random_state)
181 data = [open(filename).read() for filename in filenames]
182 if charset is not None:
--> 183 data = [d.decode(charset, charse_error) for d in data]
184 return Bunch(data=data,
185 filenames=filenames,
C:\Python33\lib\site-packages\sklearn\datasets\base.py in <listcomp>(.0)
181 data = [open(filename).read() for filename in filenames]
182 if charset is not None:
--> 183 data = [d.decode(charset, charse_error) for d in data]
184 return Bunch(data=data,
185 filenames=filenames,
AttributeError: 'str' object has no attribute 'decode'