c++ - 窗口中的多线程虚假共享

Question

嗨，我在测试代码中没有发生预期的错误共享时遇到问题。

我正在尝试创建一个进程唯一的线程管理器，它可以均匀地管理多个线程。

唯一的线程管理器类不是线程池，它通过将任务函数分配给指定的线程进行操作，并且能够获得任务函数的返回值，这不仅仅是将任务推入队列而不考虑。此外，线程管理器不关心任务的大小（计算量）。

线程管理器将被一个线程（主线程）用于处理计算部分，并且它会被非常频繁地使用。这样做的原因是，我的流程将具有游戏循环设计模式，并且我想让游戏循环超过 120 FPS，这意味着必须在 8.3 毫秒内完成 1 个游戏循环。一个线程（主线程）可能会在 1 个游戏循环内多次分配此任务，因此减少/消除上下文切换成本是我的主要关注点。我的结论是让线程管理器的线程自旋锁。

简而言之，游戏循环将按照两个步骤进行多次迭代。

主循环将任务分配给线程管理器。
等待线程管理器的任务结果。

下面是我的测试代码。

线程管理器.h

namespace YSLibrary
{
    class CThreadManager final
    {
    private:

        static long long s_llLock;

        static unsigned long long s_ullThreadCount;
        static void** s_ppThreads;
        static unsigned long* s_pThreadIDs;
        static long long* s_pThreadQuits;

        static long long* s_pTaskLocks;
        static unsigned long long (**s_ppTasks)();
        static unsigned long long* s_pTaskResults;

        CThreadManager(){}
        ~CThreadManager(){}

        __forceinline static void Lock()
        {
            while (true)
            {
                if (InterlockedCompareExchange64(&s_llLock, 1LL, 0LL) == 0LL)
                {
                    return;
                }

                Sleep(0UL);
            }
        }

        __forceinline static void Unlock()
        {
            InterlockedExchange64(&s_llLock, 0LL);
        }

        static unsigned long __stdcall Thread(void* const _pParameter)
        {
            const unsigned long long ullThreadIndex = reinterpret_cast<const unsigned long long>(_pParameter);

            while (true)
            {
                if (InterlockedCompareExchange64(&s_pThreadQuits[ullThreadIndex], 0LL, 1LL) == 1LL)
                {
                    return 1UL;
                }

                if (InterlockedCompareExchange64(&s_pTaskLocks[ullThreadIndex], 1LL, 0LL) == 0LL)
                {
                    if (s_ppTasks[ullThreadIndex] != nullptr)
                    {
                        s_pTaskResults[ullThreadIndex] = s_ppTasks[ullThreadIndex]();
                        s_ppTasks[ullThreadIndex] = nullptr;
                    }

                    InterlockedExchange64(&s_pTaskLocks[ullThreadIndex], 0LL);
                }
            }
        }

    public:

        enum class EResult : unsigned long long
        {
            None = 0ULL,
            Success = 1ULL,
            Fail_ArgumentNull = 2ULL,
            Fail_ArgumentInvalid = 3ULL,
            Fail_Locked = 4ULL,
            Fail_ThreadCountNotZero = 5ULL,
            Fail_ThreadCountZero = 6ULL,
            Fail_ThreadsNotNull = 7ULL,
            Fail_ThreadsNull = 8ULL,
            Fail_ThreadIDsNotNull = 9ULL,
            Fail_ThreadIDsNull = 10ULL,
            Fail_ThreadQuitsNotNull = 11ULL,
            Fail_ThreadQuitsNull = 12ULL,
            Fail_TaskLocksNotNull = 13ULL,
            Fail_TaskLocksNull = 14ULL,
            Fail_TasksNotNull = 15ULL,
            Fail_TasksNull = 16ULL,
            Fail_TaskResultsNotNull = 17ULL,
            Fail_TaskResultsNull = 18ULL,
            Fail_CreateThread = 19ULL
        };

        __forceinline static EResult Initialize(const unsigned long long _ullThreadCount)
        {
            if (_ullThreadCount == 0ULL)
            {
                return EResult::Fail_ArgumentNull;
            }

            Lock();

            if (s_ullThreadCount != 0ULL)
            {
                Unlock();
                return EResult::Fail_ThreadCountNotZero;
            }

            if (s_ppThreads != nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadsNotNull;
            }

            if (s_pThreadIDs != nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadIDsNotNull;
            }

            if (s_pThreadQuits != nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadQuitsNotNull;
            }

            if (s_pTaskLocks != nullptr)
            {
                Unlock();
                return EResult::Fail_TaskLocksNotNull;
            }

            if (s_ppTasks != nullptr)
            {
                Unlock();
                return EResult::Fail_TasksNotNull;
            }

            if (s_pTaskResults != nullptr)
            {
                Unlock();
                return EResult::Fail_TaskResultsNotNull;
            }

            s_ullThreadCount = _ullThreadCount;
            s_ppThreads = new void*[s_ullThreadCount]{};
            s_pThreadIDs = new unsigned long[s_ullThreadCount]{};
            s_pThreadQuits = new long long[s_ullThreadCount]{};

            s_pTaskLocks = new long long[s_ullThreadCount]{};
            s_ppTasks = new (unsigned long long (*[s_ullThreadCount])()){};
            s_pTaskResults = new unsigned long long[s_ullThreadCount]{};

            for (unsigned long long i = 0ULL; i < s_ullThreadCount; ++i)
            {
                s_ppThreads[i] = CreateThread(nullptr, 0ULL, &Thread, reinterpret_cast<void*>(i), 0UL, &s_pThreadIDs[i]);
                if (s_ppThreads[i] == nullptr)
                {
                    // Rollback
                    for (unsigned long long j = 0ULL; j < i; ++j)
                    {
                        InterlockedExchange64(&s_pThreadQuits[i], 1LL);
                    }

                    unsigned long ulExitCode = 0UL;
                    for (unsigned long long j = 0ULL; j < i; ++j)
                    {
                        while (true)
                        {
                            GetExitCodeThread(s_ppThreads[j], &ulExitCode);
                            if (ulExitCode != static_cast<unsigned long>(STILL_ACTIVE))
                            {
                                CloseHandle(s_ppThreads[j]);
                                s_ppThreads[j] = nullptr;
                                break;
                            }

                            Sleep(0UL);
                        }
                    }

                    delete[] s_pTaskResults;
                    s_pTaskResults = nullptr;

                    delete[] s_ppTasks;
                    s_ppTasks = nullptr;

                    delete[] s_pTaskLocks;
                    s_pTaskLocks = nullptr;

                    delete[] s_pThreadQuits;
                    s_pThreadQuits = nullptr;

                    delete[] s_pThreadIDs;
                    s_pThreadIDs = nullptr;

                    delete[] s_ppThreads;
                    s_ppThreads = nullptr;

                    s_ullThreadCount = 0ULL;

                    Unlock();
                    return EResult::Fail_CreateThread;
                }
            }

            Unlock();
            return EResult::Success;
        }

        __forceinline static EResult Terminate()
        {
            Lock();

            if (s_ullThreadCount == 0ULL)
            {
                Unlock();
                return EResult::Fail_ThreadCountZero;
            }

            if (s_ppThreads == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadsNull;
            }

            if (s_pThreadIDs == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadIDsNull;
            }

            if (s_pThreadQuits == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadQuitsNull;
            }

            if (s_pTaskLocks == nullptr)
            {
                Unlock();
                return EResult::Fail_TaskLocksNull;
            }

            if (s_ppTasks == nullptr)
            {
                Unlock();
                return EResult::Fail_TasksNull;
            }

            if (s_pTaskResults == nullptr)
            {
                Unlock();
                return EResult::Fail_TaskResultsNull;
            }

            for (unsigned long long i = 0ULL; i < s_ullThreadCount; ++i)
            {
                InterlockedExchange64(&s_pThreadQuits[i], 1LL);
            }

            unsigned long ulExitCode = 0UL;
            for (unsigned long long i = 0ULL; i < s_ullThreadCount; ++i)
            {
                while (true)
                {
                    GetExitCodeThread(s_ppThreads[i], &ulExitCode);
                    if (ulExitCode != static_cast<unsigned long>(STILL_ACTIVE))
                    {
                        CloseHandle(s_ppThreads[i]);
                        s_ppThreads[i] = nullptr;
                        break;
                    }

                    Sleep(0UL);
                }
            }

            delete[] s_pTaskResults;
            s_pTaskResults = nullptr;

            delete[] s_ppTasks;
            s_ppTasks = nullptr;

            delete[] s_pTaskLocks;
            s_pTaskLocks = nullptr;

            delete[] s_pThreadQuits;
            s_pThreadQuits = nullptr;

            delete[] s_pThreadIDs;
            s_pThreadIDs = nullptr;

            delete[] s_ppThreads;
            s_ppThreads = nullptr;

            s_ullThreadCount = 0ULL;

            Unlock();
            return EResult::Success;
        }

        __forceinline static EResult Execute(const unsigned long long _ullThreadIndex, unsigned long long (*_pFunction)())
        {
            if (_pFunction == nullptr)
            {
                return EResult::Fail_ArgumentNull;
            }

            Lock();

            if (s_ullThreadCount == 0ULL)
            {
                Unlock();
                return EResult::Fail_ThreadCountZero;
            }

            if (s_ppThreads == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadsNull;
            }

            if (s_pThreadIDs == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadIDsNull;
            }

            if (s_pThreadQuits == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadQuitsNull;
            }

            if (s_pTaskLocks == nullptr)
            {
                Unlock();
                return EResult::Fail_TaskLocksNull;
            }

            if (s_ppTasks == nullptr)
            {
                Unlock();
                return EResult::Fail_TasksNull;
            }

            if (s_pTaskResults == nullptr)
            {
                Unlock();
                return EResult::Fail_TaskResultsNull;
            }

            if (_ullThreadIndex >= s_ullThreadCount)
            {
                Unlock();
                return EResult::Fail_ArgumentInvalid;
            }

            while (true)
            {
                if (InterlockedCompareExchange64(&s_pTaskLocks[_ullThreadIndex], 1LL, 0LL) == 0LL)
                {
                    s_ppTasks[_ullThreadIndex] = _pFunction;

                    InterlockedExchange64(&s_pTaskLocks[_ullThreadIndex], 0LL);
                    Unlock();
                    return EResult::Success;
                }

                Sleep(0UL);
            }
        }

        __forceinline static EResult WaitForResult(const unsigned long long _ullThreadIndex, unsigned long long* const _pFunctionResult)
        {
            if (_pFunctionResult == nullptr)
            {
                return EResult::Fail_ArgumentNull;
            }

            Lock();

            if (s_ullThreadCount == 0ULL)
            {
                Unlock();
                return EResult::Fail_ThreadCountZero;
            }

            if (s_ppThreads == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadsNull;
            }

            if (s_pThreadIDs == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadIDsNull;
            }

            if (s_pThreadQuits == nullptr)
            {
                Unlock();
                return EResult::Fail_ThreadQuitsNull;
            }

            if (s_pTaskLocks == nullptr)
            {
                Unlock();
                return EResult::Fail_TaskLocksNull;
            }

            if (s_ppTasks == nullptr)
            {
                Unlock();
                return EResult::Fail_TasksNull;
            }

            if (s_pTaskResults == nullptr)
            {
                Unlock();
                return EResult::Fail_TaskResultsNull;
            }

            if (_ullThreadIndex >= s_ullThreadCount)
            {
                Unlock();
                return EResult::Fail_ArgumentInvalid;
            }

            while (true)
            {
                if (InterlockedCompareExchange64(&s_pTaskLocks[_ullThreadIndex], 1LL, 0LL) == 0LL)
                {
                    if (s_ppTasks[_ullThreadIndex] == nullptr)
                    {
                        (*_pFunctionResult) = s_pTaskResults[_ullThreadIndex];

                        InterlockedExchange64(&s_pTaskLocks[_ullThreadIndex], 0LL);
                        Unlock();
                        return EResult::Success;
                    }

                    InterlockedExchange64(&s_pTaskLocks[_ullThreadIndex], 0LL);
                }

                Sleep(0UL);
            }
        }
    };
}

主文件

#include <iostream>
#include <Windows.h>
#include "ThreadManager.h"

long long YSLibrary::CThreadManager::s_llLock = 0LL;
unsigned long long YSLibrary::CThreadManager::s_ullThreadCount = 0ULL;
void** YSLibrary::CThreadManager::s_ppThreads = nullptr;
unsigned long* YSLibrary::CThreadManager::s_pThreadIDs = nullptr;
long long* YSLibrary::CThreadManager::s_pThreadQuits = nullptr;
long long* YSLibrary::CThreadManager::s_pTaskLocks = nullptr;
unsigned long long (**YSLibrary::CThreadManager::s_ppTasks)() = nullptr;
unsigned long long* YSLibrary::CThreadManager::s_pTaskResults = nullptr;

unsigned long long g_pResults[10]{};

struct SData
{
    unsigned long long ullData[8];
};

SData g_stData{};

SData g_stData0{};
SData g_stData1{};
SData g_stData2{};
SData g_stData3{};
SData g_stData4{};
SData g_stData5{};
SData g_stData6{};

unsigned long long Function()
{
    for (unsigned long long i = 0ULL; i < 70000000ULL; ++i)
    {
        g_stData.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

unsigned long long Function0()
{
    for (unsigned long long i = 0ULL; i < 10000000ULL; ++i)
    {
        g_stData0.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

unsigned long long Function1()
{
    for (unsigned long long i = 0ULL; i < 10000000ULL; ++i)
    {
        g_stData1.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

unsigned long long Function2()
{
    for (unsigned long long i = 0ULL; i < 10000000ULL; ++i)
    {
        g_stData2.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

unsigned long long Function3()
{
    for (unsigned long long i = 0ULL; i < 10000000ULL; ++i)
    {
        g_stData3.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

unsigned long long Function4()
{
    for (unsigned long long i = 0ULL; i < 10000000ULL; ++i)
    {
        g_stData4.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

unsigned long long Function5()
{
    for (unsigned long long i = 0ULL; i < 10000000ULL; ++i)
    {
        g_stData5.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

unsigned long long Function6()
{
    for (unsigned long long i = 0ULL; i < 10000000ULL; ++i)
    {
        g_stData6.ullData[0] = static_cast<unsigned long long>(rand());
    }

    return 1ULL;
}

int main()
{
    unsigned long long ullStartTick = 0ULL;
    unsigned long long ullEndTick = 0ULL;

    srand((unsigned int)time(nullptr));

    ullStartTick = GetTickCount64();

    Function();

    ullEndTick = GetTickCount64();

    std::wcout << L"[Main]" << std::endl;
    std::wcout << ullEndTick - ullStartTick << std::endl;

    YSLibrary::CThreadManager::EResult eResult = YSLibrary::CThreadManager::EResult::None;

    eResult = YSLibrary::CThreadManager::Initialize(7ULL);

    ullStartTick = GetTickCount64();

    eResult = YSLibrary::CThreadManager::Execute(0ULL, &Function0);
    eResult = YSLibrary::CThreadManager::Execute(1ULL, &Function1);
    eResult = YSLibrary::CThreadManager::Execute(2ULL, &Function2);
    eResult = YSLibrary::CThreadManager::Execute(3ULL, &Function3);
    eResult = YSLibrary::CThreadManager::Execute(4ULL, &Function4);
    eResult = YSLibrary::CThreadManager::Execute(5ULL, &Function5);
    eResult = YSLibrary::CThreadManager::Execute(6ULL, &Function6);
    eResult = YSLibrary::CThreadManager::WaitForResult(0ULL, &g_pResults[0]);
    eResult = YSLibrary::CThreadManager::WaitForResult(1ULL, &g_pResults[1]);
    eResult = YSLibrary::CThreadManager::WaitForResult(2ULL, &g_pResults[2]);
    eResult = YSLibrary::CThreadManager::WaitForResult(3ULL, &g_pResults[3]);
    eResult = YSLibrary::CThreadManager::WaitForResult(4ULL, &g_pResults[4]);
    eResult = YSLibrary::CThreadManager::WaitForResult(5ULL, &g_pResults[5]);
    eResult = YSLibrary::CThreadManager::WaitForResult(6ULL, &g_pResults[6]);

    ullEndTick = GetTickCount64();

    std::wcout << L"[Thread Manager]" << std::endl;
    std::wcout << ullEndTick - ullStartTick << std::endl;

    YSLibrary::CThreadManager::Terminate();

    system("pause");

    return 0;
}

对于 Interlocked 系列函数、__forceinline、静态变量的脏声明等，我感到非常抱歉。

另一方面，我使用“long long”作为锁变量的原因是没有“bool”类型。我宁愿尝试“短”，但当我测量“短”和“长”之间的时间时，它没有显着差异。相反，“short”稍微慢了一点，我猜原因是在 64 位环境中使用了 16 位寄存器。此外，bool 或 short 类型可能会导致内存对齐问题。所以我使用了“long long”类型。

CThreadManager 之所以有私有构造函数，是为了明确禁止“new CThreadManager()”。

“reinterpret_cast”的使用被最小化。我认为它的成本是编译时间，但我从 stackoverflow 看到一个问题，它有运行时成本。我还不确定。所以只需在线程函数开始时使用它一次。

到目前为止，我已经通过更改检查了虚假共享现象

SData::ullData[8] -> SData::ullData 1

此外，使用 Sleep(0) 显着减少了 WaitForResult() 中线程时间片的浪费，并减少了线程内的总执行时间。

这段代码的结果显示

[Main]
1828
[Thread Manager]
344

在我的环境中。

但是，我才意识到除了 SData::ullData 之外，还有另一个地方肯定会发生错误共享，即s_pThreadQuits、s_pTaskLocks、s_ppTasks、s_pTaskResults。

为什么这些变量不会发生错误共享？

[编辑]

我所说的“错误共享”是指“不同线程访问但共享相同缓存线的内存地址”是

SData g_stDataN（在每个 FunctionN() 中）
s_pThreadQuits、s_pTaskLocks、s_pTaskResults、s_ppTasks（在 Thread() 中）

我想到了 2. 变量也将加载到缓存中，就像 g_stDataN（在我的环境中为 64 字节）所做的那样。我将 SData 的大小设置为 64 字节，以实现“填充”方法的结果，以避免错误共享。

但是，就 s_pThreadQuits 的大小既不是 64 字节也不是填充的而言，它也应该具有错误共享。

就像下面这张图片。

图片来源来自 https://www.codeproject.com/Articles/85356/Avoiding-and-Identifying-False-Sharing-Among-Threa

c++ - 窗口中的多线程虚假共享

0 回答 0

Related

Reference