我有一个文档列表和这个类来对该列表执行操作。所以,基本上,morphed_documents是一个字符串列表。最后,算法返回每个文档的集群。但是为什么结果和模型的标签不一样呢?
class Vectorizer:
def __init__(self):
self.vectorizer = TfidfVectorizer()
def fit_transform(self, morphed_documents):
matrix = self.vectorizer.fit_transform(morphed_documents)
return matrix
def fit(self, number_of_clusters, matrix):
model = KMeans(n_clusters=number_of_clusters, init='k-means++', max_iter=100, n_init=100)
model.fit(matrix)
return model
def print_terms(self, model, number_of_clusters):
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
terms = self.vectorizer.get_feature_names()
for i in range(number_of_clusters):
print("Cluster %d:" % i),
for ind in order_centroids[i, :100]:
print(' %s' % terms[ind])