typedef unsigned long Count;
typedef float Weight;
typedef std::map<std::string, Count> StringToCountMap;
typedef std::map<std::string, Weight> StringToWeightMap;
typedef std::map<unsigned long, StringToCountMap> UnsignedToStringToCountMap;
typedef std::map<unsigned long, StringToWeightMap> UnsignedToStringToWeightMap;
typedef std::map<unsigned long, std::size_t> ClustersMap;
class DefaultClusteringAlgorithm
{
public:
// minumum number of documents changing clusters for algorithm to end
static const unsigned DocumentChangeThreshold = 0;
DefaultClusteringAlgorithm(unsigned numClusters, const UnsignedToStringToWeightMap &documentVectors)
: numClusters_(numClusters)
, documentVectors_(documentVectors)
{
}
~DefaultClusteringAlgorithm() {}
const ClustersMap &DoClustering();
private:
void ChooseInitialCentroids();
unsigned ClusterOnCentroids();
void RecalculateCentroids();
float DocumentDotProduct(const StringToWeightMap &left, const StringToWeightMap &right);
float DocumentLength(const StringToWeightMap &document);
unsigned numClusters_;
// stores cluster_id => centroid
std::vector<StringToWeightMap> centroids_;
// maps question id => cluster id
ClustersMap clusters_;
// document vector
const UnsignedToStringToWeightMap &documentVectors_;
};
void DefaultClusteringAlgorithm::RecalculateCentroids()
{
std::vector<unsigned> newCentroidsSizes(centroids_.size());
std::vector<StringToWeightMap> newCentroids(centroids_.size());
ClustersMap::const_iterator clusterMapping = clusters_.begin();
for (; clusterMapping != clusters_.end(); ++clusterMapping)
{
std::size_t clusterId = clusterMapping->second;
++newCentroidsSizes[clusterId];
const StringToWeightMap &document = documentVectors_.at(clusterMapping->first);
StringToWeightMap::const_iterator termWeight = document.cbegin();
for (; termWeight != document.end(); ++termWeight);
{
newCentroids[clusterId][termWeight->first] += termWeight->second;
}
}
std::vector<unsigned>::iterator centroidSize = newCentroidsSizes.begin();
for (; centroidSize != newCentroidsSizes.end(); ++centroidSize)
{
std::size_t clusterId = centroidSize - newCentroidsSizes.begin();
StringToWeightMap::iterator centroidTermWeight = newCentroids[clusterId].begin();
for (; centroidTermWeight != newCentroids[clusterId].end(); ++centroidTermWeight)
{
centroidTermWeight->second /= *centroidSize;
}
}
}
在创建 const_iterator termWeight 时出现问题:
StringToWeightMap::const_iterator termWeight = document.begin();
正如您在上图中所见, termWeight const_iterator 包含无效数据。但是,const std::map 文档是完全有效的 std::map。我想不出为什么会发生这种情况。
我最近了解到 std::map::cbegin() 存在。我应该改用那种方法吗?
编辑:包括更多上下文