我是 Agglomerative Clustering 和 doc2vec 的新手,所以我希望有人可以帮助我解决以下问题。
这是我的代码:
model = AgglomerativeClustering(linkage='average',
connectivity=None, n_clusters=2)
X = model_dm.docvecs.doctag_syn0
model.fit(X, y=None)
model.fit_predict(X, y=None)
我想要的是预测每个观察的距离的平均值。我收到以下错误:
MemoryErrorTraceback (most recent call last)
<ipython-input-22-d8b93bc6abe1> in <module>()
2 model = AgglomerativeClustering(linkage='average',connectivity=None,n_clusters=2)
3 X = model_dm.docvecs.doctag_syn0
----> 4 model.fit(X, y=None)
5
/usr/local/lib64/python2.7/site-packages/sklearn/cluster/hierarchical.pyc in fit(self, X, y)
763 n_components=self.n_components,
764 n_clusters=n_clusters,
--> 765 **kwargs)
766 # Cut the tree
767 if compute_full_tree:
/usr/local/lib64/python2.7/site-packages/sklearn/externals/joblib/memory.pyc in __call__(self, *args, **kwargs)
281
282 def __call__(self, *args, **kwargs):
--> 283 return self.func(*args, **kwargs)
284
285 def call_and_shelve(self, *args, **kwargs):
/usr/local/lib64/python2.7/site-packages/sklearn/cluster/hierarchical.pyc in _average_linkage(*args, **kwargs)
547 def _average_linkage(*args, **kwargs):
548 kwargs['linkage'] = 'average'
--> 549 return linkage_tree(*args, **kwargs)
550
551
/usr/local/lib64/python2.7/site-packages/sklearn/cluster/hierarchical.pyc in linkage_tree(X, connectivity, n_components, n_clusters, linkage, affinity, return_distance)
428 i, j = np.triu_indices(X.shape[0], k=1)
429 X = X[i, j]
--> 430 out = hierarchy.linkage(X, method=linkage, metric=affinity)
431 children_ = out[:, :2].astype(np.int)
432
/usr/local/lib64/python2.7/site-packages/scipy/cluster/hierarchy.pyc in linkage(y, method, metric)
669 'matrix looks suspiciously like an uncondensed '
670 'distance matrix')
--> 671 y = distance.pdist(y, metric)
672 else:
673 raise ValueError("`y` must be 1 or 2 dimensional.")
/usr/local/lib64/python2.7/site-packages/scipy/spatial/distance.pyc in pdist(X, metric, p, w, V, VI)
1375
1376 m, n = s
-> 1377 dm = np.zeros((m * (m - 1)) // 2, dtype=np.double)
1378
1379 # validate input for multi-args metrics
MemoryError: