我有一个使用面向数据的设计的 boids 模拟,并且没有任何线程代码,它以 30 fps 运行。我决定使用专用线程来更新 boids 来提高性能,但性能从 30 fps 变为 10 fps。数据是全局的,因此它可以被任何线程访问
一开始我以为是错误共享的问题,所以我尝试只用一个线程运行更新代码,但与非线程版本相比,我的 fps 仍然很低。值得一提的是,主线程只访问要读取(渲染)的数据,从不写入。抱歉,如果代码太多,但我尝试提取其中最重要的部分,以便您了解它是如何工作的。
// Structures of arrays for the boids
struct SActorsData
SColor* Color;
SVector3* Position;
SVector3* Scale;
Mesh** MeshPtr;
struct SBoidsData
SVector3* Velocity;
float* Radius;
std::vector<bool> WorkersSemaphore;
std::vector<std::thread> Workers;
std::vector<CActor*> Actors;
SActorsData ActorsData;
SBoidsData BoidsData;
struct CActor
// points to the index corresponding to the data of this actor
uint32_t ActorIdx;
ActorsData.Position[ActorIdx] = ActorsData.Position[ActorIdx] + BoidsData.Velocity[ActorIdx] * DeltaTime;
void WorkerUpdate(uint32_t ThreadIdx, uint32_t ActorsStartIdx, uint32_t ActorsEndIdx)
while (bIsAppRunning)
if (WorkersSemaphore[ThreadIdx])
for (uint32_t i = ActorsStartIdx; i < ActorsEndIdx; ++i)
WorkersSemaphore[ThreadIdx] = false;
void main()
// Code sample on how I allocate memory for each field
uint16_t BoidsAdditionalBuffer = 100;
BoidsData.Velocity = new SVector3[InitialBoidsCount + BoidsAdditionalBuffer];
// Create the worker threads
int32_t ChunkSize = Actors.size() / NumWorkers;
int32_t Reminder = Actors.size() - ChunkSize * NumWorkers;
for (uint32_t i = 0; i < NumWorkers; ++i)
uint32_t ActorsStartIdx = i * ChunkSize;
uint32_t ActorsEndIdx = i * ChunkSize + ChunkSize;
Workers.push_back(std::move(std::thread(WorkerUpdate, i, ActorsStartIdx, ActorsEndIdx)));
while (bIsAppRunning)
// Update the semaphores to true so worker threads can update
for (uint32_t i = 0; i < WorkersSemaphore.size(); ++i)
WorkersSemaphore[i] = true;
// Wait until worker threads have completed the update
while (true)
bool bUpdateDone = true;
for (uint32_t i = 0; i < WorkersSemaphore.size(); ++i)
bUpdateDone &= !WorkersSemaphore[i];
if (bUpdateDone)