我接受,我是python的新手。
我从https://sds-platform-private.s3-us-east-2.amazonaws.com/uploads/P14-Part3-Classification.zipdataset = pd.read_csv('Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv')
获得了 KNN 算法代码 我修改了
指向我的 CICIDS 链接
的数据集行
IDS 2017 数据集。
我使用 jupyter notebook 来运行代码。
在执行代码片段时:
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red', 'green'))(i), label = j)
plt.title('K-NN (Training set)')
plt.xlabel('Xaxis')
plt.ylabel('Yaxis')
plt.legend()
plt.show()
I get this error:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-8-db5b0d75197c> in <module>
3 X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01),
4 np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))
----> 5 plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape),
6 alpha = 0.75, cmap = ListedColormap(('red', 'green')))
7 plt.xlim(X1.min(), X1.max())
~\anaconda3\lib\site-packages\sklearn\neighbors\_classification.py in predict(self, X)
173 X = check_array(X, accept_sparse='csr')
174
--> 175 neigh_dist, neigh_ind = self.kneighbors(X)
176 classes_ = self.classes_
177 _y = self._y
~\anaconda3\lib\site-packages\sklearn\neighbors\_base.py in kneighbors(self, X, n_neighbors, return_distance)
660 delayed_query = delayed(_tree_query_parallel_helper)
661 parallel_kwargs = {"prefer": "threads"}
--> 662 chunked_results = Parallel(n_jobs, **parallel_kwargs)(
663 delayed_query(
664 self._tree, X[s], n_neighbors, return_distance)
~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
~\anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
864 return False
865 else:
--> 866 self._dispatch(tasks)
867 return True
868
~\anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
782 with self._lock:
783 job_idx = len(self._jobs)
--> 784 job = self._backend.apply_async(batch, callback=cb)
785 # A job can complete so quickly than its callback is
786 # called before we get here, causing self._jobs to
~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~\anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~\anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
260 # change the default number of processes to -1
261 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262 return [func(*args, **kwargs)
263 for func, args, kwargs in self.items]
264
~\anaconda3\lib\site-packages\sklearn\neighbors\_base.py in _tree_query_parallel_helper(tree, *args, **kwargs)
490 under PyPy.
491 """
--> 492 return tree.query(*args, **kwargs)
493
494
sklearn\neighbors\_binary_tree.pxi in sklearn.neighbors._kd_tree.BinaryTree.query()
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(*args, **kwargs)
70 FutureWarning)
71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 72 return f(**kwargs)
73 return inner_f
74
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
596 array = array.astype(dtype, casting="unsafe", copy=False)
597 else:
--> 598 array = np.asarray(array, order=order, dtype=dtype)
599 except ComplexWarning:
600 raise ValueError("Complex data not supported\n"
~\anaconda3\lib\site-packages\numpy\core\_asarray.py in asarray(a, dtype, order)
81
82 """
---> 83 return array(a, dtype, copy=False, order=order)
84
85
MemoryError: Unable to allocate 2.41 GiB for an array with shape (161771019, 2) and data type float64```
WHAT COULD GO WRONG?