c++ - 了解缓存友好、面向数据的对象和句柄

Question

using namespace std;

考虑一种传统的OOP 方法来进行实体/对象管理：

struct Entity { bool alive{true}; }

struct Manager {        
    vector<unique_ptr<Entity>> entities; // Non cache-friendly

    void update() {
        // erase-remove_if idiom: remove all !alive entities
        entities.erase(remove_if(begin(entities), end(entities),
            [](const unique_ptr<Entity>& e){ return !e->alive; }));
    }
};

struct UserObject {
    // Even if Manager::entities contents are re-ordered
    // this reference is still valid (if the entity was not deleted)
    Entity& entity;
};

但是，我想尝试一种面向数据的方法：不是动态分配Entity实例，而是将它们存储在缓存友好的线性内存中。

struct Manager {
    vector<Entity> entities; // Cache-friendly
    void update() { /* erase-remove_if !alive entities */ }
};

struct UserObject {
    // This reference may unexpectedly become invalid
    Entity& entity;
};

看起来很好。但是……如果std::vector需要重新分配其内部数组，所有对实体的引用都将变得无效。

解决方案是使用句柄类。

struct Entity { bool alive{true}; };
struct EntityHandle { int index; };

struct Manager {
    vector<Entity> entities; // Cache-friendly      
    void update() { /* erase-remove_if !alive entities */ }
    Entity& getEntity(EntityHandle h) { return entities[h.index]; }
};

struct UserObject { EntityHandle entity; };

如果我只是在向量后面添加/删除实体，它似乎可以工作。我可以使用getEntity方法来检索我想要的实体。

但是如果我Entity从向量的中间移除一个呢？所有EntityHandle实例现在都将持有不正确的索引，因为一切都被转移了。例子：

句柄指向索引：2

实体 A 在 update() 期间被删除

现在句柄指向错误的实体。

图 2

这个问题一般是怎么处理的？

句柄索引是否更新？

死实体是否被占位符替换？

澄清：

这和这就是我所说的缓存友好设计的例子。

此外，诸如Artemis之类的组件系统声称采用线性缓存友好设计，并且它们使用类似于句柄的解决方案。他们如何处理我在这个问题中描述的问题？

score 7 · Accepted Answer

insomniac 做了一个很棒的幻灯片，他们的解决方案是这样的

template<typename T, size_t SIZE>
class ResourceManager
{
    T data[SIZE];
    int indices[SIZE];
    size_t back;

    ResourceManager() : back(0)
    {
        for(size_t i=0; i<SIZE; i++)
            indices[i] = static_cast<int>(i);
    }

    int Reserve()
    { return indices[back++]; }

    void Release(int handle)
    {
        for(size_t i=0; i<back; i++)
        {
            if(indices[i] == handle)
            {
                back--;
                std::swap(indices[i], indices[back]);
                return;
            }
        }
    }

    T GetData(size_t handle)
    { return data[handle]; }
};

我希望这个例子能清楚地说明这个想法。

score 6 · Accepted Answer

如果您需要稳定的索引或指针，那么您的数据结构要求开始类似于内存分配器。内存分配器也是一种特殊类型的数据结构，但面临这样的要求，即它们不能随机分配内存或重新分配内存，因为这会使客户端存储的指针无效。所以我建议从经典的空闲列表开始查看内存分配器的实现。

免费清单

这是我写的一个简单的 C 实现来向同事说明这个想法（不打扰线程同步）：

typedef struct FreeList FreeList;

struct FreeList
{
    /// Stores a pointer to the first block in the free list.
    struct FlBlock* first_block;

    /// Stores a pointer to the first free chunk.
    struct FlNode* first_node;

    /// Stores the size of a chunk.
    int type_size;

    /// Stores the number of elements in a block.
    int block_num;
};

/// @return A free list allocator using the specified type and block size, 
/// both specified in bytes.
FreeList fl_create(int type_size, int block_size);

/// Destroys the free list allocator.
void fl_destroy(FreeList* fl);

/// @return A pointer to a newly allocated chunk.
void* fl_malloc(FreeList* fl);

/// Frees the specified chunk.
void fl_free(FreeList* fl, void* mem);

// Implementation:   
typedef struct FlNode FlNode;
typedef struct FlBlock FlBlock;
typedef long long FlAlignType;

struct FlNode
{
    // Stores a pointer to the next free chunk.
    FlNode* next;
};

struct FlBlock
{
    // Stores a pointer to the next block in the list.
    FlBlock* next;

    // Stores the memory for each chunk (variable-length struct).
    FlAlignType mem[1];
};

static void* mem_offset(void* ptr, int n)
{
    // Returns the memory address of the pointer offset by 'n' bytes.
    char* mem = ptr;
    return mem + n;
}

FreeList fl_create(int type_size, int block_size)
{
    // Initialize the free list.
    FreeList fl;
    fl.type_size = type_size >= sizeof(FlNode) ? type_size: sizeof(FlNode);
    fl.block_num = block_size / type_size;
    fl.first_node = 0;
    fl.first_block = 0;
    if (fl.block_num == 0)
        fl.block_num = 1;
    return fl;
}

void fl_destroy(FreeList* fl)
{
    // Free each block in the list, popping a block until the stack is empty.
    while (fl->first_block)
    {
        FlBlock* block = fl->first_block;
        fl->first_block = block->next;
        free(block);
    }
    fl->first_node = 0;
}

void* fl_malloc(FreeList* fl)
{
    // Common case: just pop free element and return.
    FlNode* node = fl->first_node;
    if (node)
    {
        void* mem = node;
        fl->first_node = node->next;
        return mem;
    }
    else
    {
        // Rare case when we're out of free elements.
        // Try to allocate a new block.
        const int block_header_size = sizeof(FlBlock) - sizeof(FlAlignType);
        const int block_size = block_header_size + fl->type_size*fl->block_num;
        FlBlock* new_block = malloc(block_size);

        if (new_block)
        {
            // If the allocation succeeded, initialize the block.
            int j = 0;
            new_block->next = fl->first_block;
            fl->first_block = new_block;

            // Push all but the first chunk in the block to the free list.
            for (j=1; j < fl->block_num; ++j)
            {
                FlNode* node = mem_offset(new_block->mem, j * fl->type_size);
                node->next = fl->first_node;
                fl->first_node = node;
            }

            // Return a pointer to the first chunk in the block.
            return new_block->mem;
        }

        // If we failed to allocate the new block, return null to indicate failure.
        return 0;
    }
}

void fl_free(FreeList* fl, void* mem)
{
    // Just push a free element to the stack.
    FlNode* node = mem;
    node->next = fl->first_node;
    fl->first_node = node;
}

随机存取序列，嵌套自由列表

了解免费列表的想法后，一种可能的解决方案是：

这种类型的数据结构将为您提供不会失效的稳定指针，而不仅仅是索引。但是，如果您想为其使用迭代器，它会增加随机访问和顺序访问的成本。vector它可以像使用方法一样进行顺序访问for_each。

这个想法是使用上面的空闲列表的概念，除了每个块存储自己的空闲列表，并且聚合块的外部数据结构存储块的空闲列表。一个块只有在完全填满时才会从空闲堆栈中弹出。

并行占用位

另一种是使用并行的位阵列来指示阵列的哪些部分被占用/空置。这样做的好处是，您可以在顺序迭代期间检查是否一次占用了许多索引（一次 64 位，此时您可以访问循环中的所有 64 个连续元素，而无需单独检查它们是否占据）。当不是所有 64 个索引都被占用时，您可以使用FFS指令快速确定设置了哪些位。

您可以将其与空闲列表结合使用，然后使用这些位来快速确定迭代期间占用的索引，同时进行快速的常数时间插入和删除。

实际上，您可以获得比std::vector旁边的索引/指针列表更快的顺序访问，因为我们可以再次检查 64 位以查看要在数据结构中遍历哪些元素，并且因为访问模式将始终是顺序的（类似于在数组中使用排序的索引列表）。

所有这些概念都围绕着在数组中留下空白空间以在后续插入时回收，如果您不希望索引或指针对尚未从容器中删除的元素无效，这将成为实际要求。

单链索引列表

另一种解决方案是使用单链表，大多数人可能认为它涉及每个节点的单独堆分配和遍历时的大量缓存未命中，但不一定是这种情况。我们可以将节点连续存储在一个数组中并将它们链接在一起。如果您不将链表视为容器，而是将现有元素链接在一起存储在另一个容器（如数组）中以允许不同的遍历和搜索模式，那么实际上会打开一个优化机会的世界。示例将所有内容存储在一个连续数组中，并使用索引将它们链接在一起：

使用这样存储的数据：

struct Bucket
{
    struct Node
    {
         // Stores the element data.
         T some_data;

         // Points to either the next node in the bucket
         // or the next free node available if this node
         // has been removed.
         int next;
    };
    vector<Node> data;

    // Points to first node in the bucket.
    int head;

    // Points to first free node in the bucket.
    int free_head;
};

这不允许随机访问，如果您从中间移除并经常插入，它的空间局部性确实会降低。但是使用后处理副本恢复它很容易。如果您只需要顺序访问并且想要恒定时间的删除和插入，它可能是合适的。如果您需要稳定的指针而不仅仅是索引，那么您可以将上述结构与嵌套的空闲列表一起使用。

当您有很多非常动态的小列表（不断删除和插入）时，索引 SLL 往往会做得很好。另一个例子，粒子连续存储，但 32 位索引链接仅用于将它们划分为网格以进行快速碰撞检测，同时允许粒子移动每一帧，并且只需更改几个整数即可将粒子从一个网格单元到另一个：

在这种情况下，您可以将 1000x1000 的网格存储在 4 兆字节以下 - 绝对比存储一百万个std::listor实例std::vector并且在粒子四处移动时必须不断地从中删除和插入它们要好。

入住指数

如果您只需要稳定的索引，另一个简单的解决方案就是使用std::vector一个std::stack<int>自由索引来回收/覆盖插入。这遵循恒定时间删除的空闲列表原则，但效率稍低，因为它需要内存来存储空闲索引堆栈。免费列表使堆栈免费。

但是，除非您手动滚动它并避免仅使用，否则您std::vector<T>不能非常有效地使其触发您在删除时存储的元素类型的析构函数（我一直没有跟上 C++，更多的是 C 程序员这些天来，但可能有一种方法可以很好地做到这一点，它仍然尊重您的元素析构函数，而无需手动滚动您自己的等价物std::vector——也许 C++ 专家可以参与）。如果您的类型是普通的 POD 类型，那也可以。

template <class T>
class ArrayWithHoles
{
private:
    std::vector<T> elements;
    std::stack<size_t> free_stack;

public:
    ...

    size_t insert(const T& element)
    {
        if (free_stack.empty())
        {
            elements.push_back(element);
            return elements.size() - 1;
        }
        else
        {
            const size_t index = free_stack.top();
            free_stack.pop();
            elements[index] = element;
            return index;
        }
    }

    void erase(size_t n)
    {
        free_stack.push(n);
    }
};

有这种效果。这让我们陷入了两难境地，因为我们无法判断哪些元素已从容器中删除以在迭代期间跳过。在这里，您可以再次使用并行位数组，也可以只在旁边存储有效索引列表。

如果你这样做，有效索引列表可能会在内存访问模式方面降级，因为它们会随着时间的推移变得未排序。一种快速修复方法是不时对索引进行基数排序，此时您已经恢复了顺序访问模式。

score 1 · Accepted Answer

我将专注于你的向量需要可变大小的情况，例如数据经常被插入并且有时被清理。在这种情况下，在向量中使用虚拟数据或空洞几乎与使用堆数据（如您的第一个解决方案）一样“糟糕”。

如果您经常直接遍历所有数据，并且只使用少数随机“UsersObject”访问，那么下面可能是一个解决方案。就像其他人和您自己提出的那样，它使用了需要在每个删除/更新步骤中更新的间接级别。这需要线性时间，并且绝对不是缓存最佳的。此外，更糟糕的是，这种解决方案不能在没有锁的情况下实现线程安全。

#include <vector>
#include <map>
#include <algorithm>
#include <iostream>
#include <mutex>

using namespace std;

typedef __int64 EntityId;

template<class Entity>
struct Manager {        
    vector<Entity>          m_entities; // Cache-friendly
    map<EntityId, size_t>   m_id_to_idx;
    mutex                   g_pages_mutex;
public:
    Manager() :
        m_entities(),
        m_id_to_idx(),
        m_remove_counter(0),
        g_pages_mutex()
    {}
    void update()
    {
        g_pages_mutex.lock();
        m_remove_counter = 0;
        // erase-remove_if idiom: remove all !alive entities

        for (vector<Entity>::iterator i = m_entities.begin(); i <  m_entities.end(); )
        {
            Entity &e = (*i);
            if (!e.m_alive)
            { 
                m_id_to_idx.erase(m_id_to_idx.find(e.m_id)); 
                i = m_entities.erase(i);
                m_remove_counter++;
                return true;
            } 
            else
            {
                m_id_to_idx[e.m_id] -= m_remove_counter;
                i++;
            }                    
        }
        g_pages_mutex.unlock();
    }
    Entity& getEntity(EntityId h)
    { 
        g_pages_mutex.lock();
        map<EntityId, size_t>::const_iterator it = m_id_to_idx.find(h);


        if (it != m_id_to_idx.end())
        {
            Entity& et =  m_entities[(*it).second];
            g_pages_mutex.unlock();
            return et;
        }
        else
        {
            g_pages_mutex.unlock();
            throw std::exception();
        }
    }
    EntityId inserEntity(const Entity& entity) 
    {
        g_pages_mutex.lock();
        size_t idx = m_entities.size();
        m_id_to_idx[entity.m_id]  = idx;
        m_entities.push_back(entity);
        g_pages_mutex.unlock();
        return entity.m_id;
    }
};

class Entity { 
    static EntityId  s_uniqeu_entity_id;
public:
    Entity (bool alive) :  m_id (s_uniqeu_entity_id++), m_alive(alive) {}
    Entity () :  m_id (s_uniqeu_entity_id++), m_alive(true) {}
    Entity (const Entity &in) : m_id(in.m_id), m_alive(in.m_alive) {}
    EntityId  m_id;
    bool m_alive; 
};

EntityId  Entity::s_uniqeu_entity_id = 0;

struct UserObject
{ 
    UserObject(bool alive, Manager<Entity>& manager) : 
        entity(manager.inserEntity(alive)) 
    {}
    EntityId entity; 
};

int main(int argc, char* argv[])
{
    Manager<Entity> manager;
    UserObject obj1(true, manager);
    UserObject obj2(false, manager);
    UserObject obj3(true, manager);
    cout << obj1.entity << "," << obj2.entity << "," << obj3.entity;
    manager.update();
    manager.getEntity(obj1.entity);
    manager.getEntity(obj3.entity);
    try
    {
        manager.getEntity(obj2.entity);
        return -1;
    }
    catch (std::exception ex)
    {
        // obj 2 should be invalid
    }
    return 0;
}

我不确定，如果您指定了足够的附加条件，为什么要解决具有这两个相互矛盾的假设的问题：拥有一个快速迭代的列表并对该列表的元素具有稳定的引用。在我看来，这听起来像是两个用例，它们也应该在数据级别上分开（例如，读取时复制，提交更改回来）。

score 1 · Accepted Answer

如果您确实测量到缓存局部性为您提供了好处，那么我建议使用内存池方法：在最基本的级别上，如果您预先知道最大元素数量，您可以简单地创建三个向量，一个带有对象，一种具有活动对象指针，一种具有自由对象指针。最初，空闲列表具有指向元素容器中所有对象的指针，然后项目在它们变为活动时被移动到活动列表，然后在它们被删除时返回到空闲列表。

即使从相应的容器中添加/删除指针，对象也永远不会改变位置，因此您的引用永远不会失效。

score 1 · Accepted Answer

要动态更改引用的矢量实体，请修改您的设计以将索引存储在 UserObject 中，而不是直接指针。这样，您可以更改引用的向量，复制旧值，然后一切仍然有效。在缓存方面，单个指针的索引可以忽略不计，而在指令方面它是相同的。

要处理删除，要么忽略它们（如果你知道它们的数量是固定的），要么维护一个空闲的索引列表。添加项目时使用此freelist，然后仅在freelist为空时增加向量。

score 0 · Accepted Answer

我心里有两个办法。第一种方法是从容器 http://www.codeproject.com/Articles/328365/Understanding-and-Implementing-Observer-Pattern-in中删除实体时更新您的句柄，第二种方法是使用键/值容器，如 map/hash表和您的句柄必须包含键而不是索引

编辑：

第一个解决方案示例

class Manager:

class Entity { bool alive{true}; };
class EntityHandle 
{
public:
    EntityHandle(Manager *manager)
    {
        manager->subscribe(this);
        // need more code for index
    }

    ~EntityHandle(Manager *manager)
    {
        manager->unsubscribe(this);
    }

    void update(int removedIndex)
    {
        if(removedIndex < index)
        {
            --index;
        }
    }

    int index; 
};

class Manager {
    vector<Entity> entities; // Cache-friendly
    list<EntityHandle*> handles;

    bool needToRemove(const unique_ptr<Entity>& e)
    {
        bool result = !e->alive;

        if(result )
            for(auto handle: handles)
            {
                handle->update(e->index);
            }

        return result;
    }

    void update() 
    {
        entities.erase(remove_if(begin(entities), end(entities),
        needToRemove);
    }
    Entity& getEntity(EntityHandle h) { return entities[h.index]; }

    subscribe(EntityHandle *handle)
    {
        handles.push_back(handle);
    }

    unsubscribe(EntityHandle *handle)
    {
        // find and remove
    }

};

我希望这足以让你明白

score 0 · Accepted Answer

让我们回顾一下你的短语

缓存友好的线性内存。

“线性”的要求是什么？如果您真的有这样的要求，请参考@seano 和@Mark B 的答案。如果您不关心线性内存，那么我们开始吧。

std::map, std::set,std::list提供对容器修改稳定（容忍）的迭代器——这意味着您可以保留迭代器，而不是保留引用：

struct UserObject {
    // This reference may unexpectedly become invalid
    my_container_t::iterator entity;
};

特别说明std::list- 关于http://isocpp.org/上的一些讲座Bjarne Stroustrup 不建议使用链表，但对于您的情况，您可以确定Entity内部Manager不会被修改 - 所以参考适用于那里。

PS 谷歌搜索很快我还没有找到是否unordered_map提供稳定的迭代器，所以我上面的列表可能不完整。

PPS 发布后我记得有趣的数据结构 - 分块列表。线性数组的链表 - 因此您可以按链接顺序保持线性固定大小的块。

c++ - 了解缓存友好、面向数据的对象和句柄

7 回答 7

Related

Reference