- 我将通过官方教程。
- 我有 256 GB 内存,所以应该没问题。
- 它只是在启动 HDBSCAN 部分后 1 秒内死亡。
a.) CODE = 官方教程,官方数据集
clusterable_embedding = umap.UMAP(
n_neighbors=30,
min_dist=0.0,
n_components=2,
random_state=42,
).fit_transform(mnist.data)
plt.scatter(clusterable_embedding[:, 0], clusterable_embedding[:, 1],
c=mnist.target, s=0.1, cmap='Spectral');
labels = hdbscan.HDBSCAN(
min_samples=10,
min_cluster_size=500,
).fit_predict(clusterable_embedding)
a.) ERROR = 官方教程,官方数据集
---------------------------------------------------------------------------
TerminatedWorkerError Traceback (most recent call last)
<ipython-input-44-be8152da6dea> in <module>
2 labels = hdbscan.HDBSCAN(
3 min_samples=10,
----> 4 min_cluster_size=500).fit_predict(clusterable_embedding)
~/anaconda3/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py in fit_predict(self, X, y)
939 cluster labels
940 """
--> 941 self.fit(X)
942 return self.labels_
943
~/anaconda3/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py in fit(self, X, y)
917 self._condensed_tree,
918 self._single_linkage_tree,
--> 919 self._min_spanning_tree) = hdbscan(X, **kwargs)
920
921 if self.prediction_data:
~/anaconda3/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py in hdbscan(X, min_cluster_size, min_samples, alpha, cluster_selection_epsilon, metric, p, leaf_size, algorithm, memory, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, cluster_selection_method, allow_single_cluster, match_reference_implementation, **kwargs)
613 approx_min_span_tree,
614 gen_min_span_tree,
--> 615 core_dist_n_jobs, **kwargs)
616 else: # Metric is a valid BallTree metric
617 # TO DO: Need heuristic to decide when to go to boruvka;
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
350
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
~/anaconda3/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py in _hdbscan_boruvka_kdtree(X, min_samples, alpha, metric, p, leaf_size, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, **kwargs)
276 leaf_size=leaf_size // 3,
277 approx_min_span_tree=approx_min_span_tree,
--> 278 n_jobs=core_dist_n_jobs, **kwargs)
279 min_spanning_tree = alg.spanning_tree()
280 # Sort edges of the min_spanning_tree by weight
hdbscan/_hdbscan_boruvka.pyx in hdbscan._hdbscan_boruvka.KDTreeBoruvkaAlgorithm.__init__()
hdbscan/_hdbscan_boruvka.pyx in hdbscan._hdbscan_boruvka.KDTreeBoruvkaAlgorithm._compute_bounds()
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
1059
1060 with self._backend.retrieval_context():
-> 1061 self.retrieve()
1062 # Make sure that we get a last message telling us we are done
1063 elapsed_time = time.time() - self._start_time
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/parallel.py in retrieve(self)
938 try:
939 if getattr(self._backend, 'supports_timeout', False):
--> 940 self._output.extend(job.get(timeout=self.timeout))
941 else:
942 self._output.extend(job.get())
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
~/anaconda3/envs/rapids/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
~/anaconda3/envs/rapids/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.
The exit codes of the workers are {EXIT(1)}
b.) HDBSCAN官方文档的代码
clusterer = hdbscan.HDBSCAN(min_cluster_size=15).fit(clusterable_embedding)
color_palette = sns.color_palette('Paired', 12)
cluster_colors = [color_palette[x] if x >= 0
else (0.5, 0.5, 0.5)
for x in clusterer.labels_]
cluster_member_colors = [sns.desaturate(x, p) for x, p in
zip(cluster_colors, clusterer.probabilities_)]
plt.scatter(*projection.T, s=50, linewidth=0, c=cluster_member_colors, alpha=0.25)
b.) HDBSCAN 错误
---------------------------------------------------------------------------
TerminatedWorkerError Traceback (most recent call last)
<ipython-input-64-5de5656b5eb1> in <module>
----> 1 clusterer = hdbscan.HDBSCAN(min_cluster_size=15).fit(clusterable_embedding)
2 color_palette = sns.color_palette('Paired', 12)
3 cluster_colors = [color_palette[x] if x >= 0
4 else (0.5, 0.5, 0.5)
5 for x in clusterer.labels_]
~/anaconda3/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py in fit(self, X, y)
917 self._condensed_tree,
918 self._single_linkage_tree,
--> 919 self._min_spanning_tree) = hdbscan(X, **kwargs)
920
921 if self.prediction_data:
~/anaconda3/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py in hdbscan(X, min_cluster_size, min_samples, alpha, cluster_selection_epsilon, metric, p, leaf_size, algorithm, memory, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, cluster_selection_method, allow_single_cluster, match_reference_implementation, **kwargs)
613 approx_min_span_tree,
614 gen_min_span_tree,
--> 615 core_dist_n_jobs, **kwargs)
616 else: # Metric is a valid BallTree metric
617 # TO DO: Need heuristic to decide when to go to boruvka;
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
350
351 def __call__(self, *args, **kwargs):
--> 352 return self.func(*args, **kwargs)
353
354 def call_and_shelve(self, *args, **kwargs):
~/anaconda3/envs/rapids/lib/python3.7/site-packages/hdbscan/hdbscan_.py in _hdbscan_boruvka_kdtree(X, min_samples, alpha, metric, p, leaf_size, approx_min_span_tree, gen_min_span_tree, core_dist_n_jobs, **kwargs)
276 leaf_size=leaf_size // 3,
277 approx_min_span_tree=approx_min_span_tree,
--> 278 n_jobs=core_dist_n_jobs, **kwargs)
279 min_spanning_tree = alg.spanning_tree()
280 # Sort edges of the min_spanning_tree by weight
hdbscan/_hdbscan_boruvka.pyx in hdbscan._hdbscan_boruvka.KDTreeBoruvkaAlgorithm.__init__()
hdbscan/_hdbscan_boruvka.pyx in hdbscan._hdbscan_boruvka.KDTreeBoruvkaAlgorithm._compute_bounds()
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
1059
1060 with self._backend.retrieval_context():
-> 1061 self.retrieve()
1062 # Make sure that we get a last message telling us we are done
1063 elapsed_time = time.time() - self._start_time
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/parallel.py in retrieve(self)
938 try:
939 if getattr(self._backend, 'supports_timeout', False):
--> 940 self._output.extend(job.get(timeout=self.timeout))
941 else:
942 self._output.extend(job.get())
~/anaconda3/envs/rapids/lib/python3.7/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
540 AsyncResults.get from multiprocessing."""
541 try:
--> 542 return future.result(timeout=timeout)
543 except CfTimeoutError as e:
544 raise TimeoutError from e
~/anaconda3/envs/rapids/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
433 raise CancelledError()
434 elif self._state == FINISHED:
--> 435 return self.__get_result()
436 else:
437 raise TimeoutError()
~/anaconda3/envs/rapids/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
382 def __get_result(self):
383 if self._exception:
--> 384 raise self._exception
385 else:
386 return self._result
TerminatedWorkerError: A worker process managed by the executor was unexpectedly terminated. This could be caused by a segmentation fault while calling the function or by an excessive memory usage causing the Operating System to kill the worker.
The exit codes of the workers are {EXIT(1)}
类似问题
- 他有一个代码非常相似的工作代码,我有一个错误 -使用 UMAP 和 HDBScan 进行聚类