1
typedef unsigned long Count;
typedef float Weight;
typedef std::map<std::string, Count> StringToCountMap;
typedef std::map<std::string, Weight> StringToWeightMap;
typedef std::map<unsigned long, StringToCountMap> UnsignedToStringToCountMap;
typedef std::map<unsigned long, StringToWeightMap> UnsignedToStringToWeightMap;

typedef std::map<unsigned long, std::size_t> ClustersMap;


class DefaultClusteringAlgorithm
{
public:
    // minumum number of documents changing clusters for algorithm to end
    static const unsigned DocumentChangeThreshold = 0;

    DefaultClusteringAlgorithm(unsigned numClusters, const UnsignedToStringToWeightMap &documentVectors)
        : numClusters_(numClusters)
        , documentVectors_(documentVectors)
    {
    }

~DefaultClusteringAlgorithm() {}

const ClustersMap &DoClustering();

private:
    void ChooseInitialCentroids();
    unsigned ClusterOnCentroids();
    void RecalculateCentroids();
    float DocumentDotProduct(const StringToWeightMap &left, const StringToWeightMap &right);
    float DocumentLength(const StringToWeightMap &document);

    unsigned numClusters_;

    // stores cluster_id => centroid
    std::vector<StringToWeightMap> centroids_;

    // maps question id => cluster id
    ClustersMap clusters_;

    // document vector
    const UnsignedToStringToWeightMap &documentVectors_;
};

void DefaultClusteringAlgorithm::RecalculateCentroids()
{
    std::vector<unsigned> newCentroidsSizes(centroids_.size());
    std::vector<StringToWeightMap> newCentroids(centroids_.size());

    ClustersMap::const_iterator clusterMapping = clusters_.begin();

    for (; clusterMapping != clusters_.end(); ++clusterMapping)
    {
        std::size_t clusterId = clusterMapping->second;

        ++newCentroidsSizes[clusterId];
        const StringToWeightMap &document = documentVectors_.at(clusterMapping->first);

        StringToWeightMap::const_iterator termWeight = document.cbegin();

        for (; termWeight != document.end(); ++termWeight);
        {
            newCentroids[clusterId][termWeight->first] += termWeight->second;
        }
    }

    std::vector<unsigned>::iterator centroidSize = newCentroidsSizes.begin();

    for (; centroidSize != newCentroidsSizes.end(); ++centroidSize)
    {
        std::size_t clusterId = centroidSize - newCentroidsSizes.begin();

        StringToWeightMap::iterator centroidTermWeight = newCentroids[clusterId].begin();

        for (; centroidTermWeight != newCentroids[clusterId].end(); ++centroidTermWeight)
        {
            centroidTermWeight->second /= *centroidSize;
        }
    }
}

调试器监视

在创建 const_iterator termWeight 时出现问题:

StringToWeightMap::const_iterator termWeight = document.begin();

正如您在上图中所见, termWeight const_iterator 包含无效数据。但是,const std::map 文档是完全有效的 std::map。我想不出为什么会发生这种情况。

我最近了解到 std::map::cbegin() 存在。我应该改用那种方法吗?

编辑:包括更多上下文

4

2 回答 2

2

哈!我发现了错误!我的 for 循环末尾的一个愚蠢的小分号!

于 2013-11-29T05:03:49.047 回答
0

std::map begin() 方法可能会返回一个指向地图末尾的迭代器,因为地图中可能根本没有任何元素。

于 2013-11-29T04:45:19.667 回答