caching - 虚假共享和缓存对齐

Question

我在 C++ 中有以下代码（稍后解释）：

#include <stdio.h>
#include <string>
#include <vector>

using namespace std;

struct th_private{
double mean_tau;

th_private()
{
    mean_tau = 0;
}

};

class resistor
{
    public:
    string name;
    /*****************************************************************************
     Approach 0: Within each resistor strcuture, declare arrays of 'thread private'
     variables. Thread 0 will use mean_tau[0], offset[0].., Thread 1 will use
     mean_tau[1], offset[1]... and so on. As I understand, this is not a good
     approach, would lead to a lot of false sharing.   
    /*****************************************************************************/
    vector<double> mean_tau;

    /*****************************************************************************
     Approach 1: 1D array of struct th_private in each instance of the resistor, 
     where state[0] is used ONLY by thread[0], state[0] is used ONLY by thread[1]
     and so on. Could potentially elimiate false sharing, but how to ensure
     it will align in the cache? 
    /*****************************************************************************/
    vector<th_private> state;

    resistor(  )
    {
        name = "";

    }


    void prepare_for_threads( int num_threads )
    {
        /* If Approach 0 */
        mean_tau.resize(num_threads);

        /* Else If Approach 1 */
        state.resize(num_threads);
    }
    ~resistor(){}
};

class mesh
{
    public:
    vector<resistor*> R;

    mesh( int num_resistors, int num_threads )
    {
        for( int i = 0; i < num_resistors; i++ )
        {
            resistor *r = new resistor();
            r->prepare_for_threads( num_threads );
            R.push_back(r);
        }   
    }

    ~mesh(){}
};

/*****************************************************************************
 Approach 2: Declare a global 2D matrix, where each row belongs to a
 thread and each column belongs to a resistor. Seems to be the best approach.

             R[0]   R[1]    R[2]    R[3]    R[4]        R[9]

    thread0: [0][0] [0][1]  [0][2]  [0][3]  [0][4]  ..  [0][9]  
    ...
    thread3: [3][0] [3][1]  [3][2]  [3][3]  [3][4]  ..  [3][9]  

/*****************************************************************************/
th_private __attribute__((aligned(0x1000))) global_state[4][10];

int main( int argc, char** argv )
{
    // Assume that there are 4 threads declared.

    mesh grid(10, 4);

    printf("sizeof(th_private): %d\n", sizeof(th_private));

    printf("Approach 1: %p %p %p %p\n", &grid.R[0]->state[0], &grid.R[0]->state[1], &grid.R[0]->state[2], &grid.R[0]->state[3]);
    printf("Approach 2: %p %p %p %p\n", &global_state[0][0], &global_state[0][1], &global_state[0][2], &global_state[0][3]);
}

在 64 位 linux 机器上的输出是：

sizeof(th_private): 8
Approach 1: 0x658080 0x658088 0x658090 0x658098
Approach 2: 0x608000 0x608008 0x608010 0x608018

每个电阻器都有一组属性，这些属性将由线程修改（读取和写入）。理想情况下，它们可以被视为线程私有变量。但是，由于旧代码库施加的一些限制，我只能采用以下三种方法之一：

方法 0：在每个电阻器结构中，声明“线程私有”变量数组。线程 0 将使用 mean_tau[0]、offset[0]..，线程 1 将使用 mean_tau[1]、offset[1]...等等。据我了解，这不是一个好方法，会导致很多虚假分享。
方法 1：电阻器的每个实例中的 struct th_private 的一维数组，其中 state[0] 仅由 thread[0] 使用，state[0] 仅由 thread[1] 使用，依此类推。可能会消除错误共享，但如何确保它在缓存中对齐？
方法 2：声明一个全局 2D 矩阵，其中每一行属于一个线程，每一列属于一个电阻器（代码中的更多详细信息）。

现在，i) 避免错误共享和 ii) 缓存对齐，哪种方法最好？

caching - 虚假共享和缓存对齐

0 回答 0

Related

Reference