c# - 具有本机集合的 Unity 作业系统比具有 C# 集合的作业系统慢

Question

我目前正在研究一个 CPU 非常繁重的模拟，并且已经转移到作业系统 + 突发编译，并且可以大大提高性能。也就是说，切换到工作系统创造了一个新的瓶颈。我想问是否有任何方法可以规避这种情况。

这是我的模拟的工作原理。它基本上在世界空间中移动一个 2d 矩形并捕获路径上的网格坐标。没有工作，最大的瓶颈是SampleLine，有了工作系统，瓶颈是trajectory.Add/.AddNative和occupied.IndexOf。

    NativeList<JobHandle> jobHandleList = new NativeList<JobHandle>(Allocator.Temp);
    List<NativeArray<float>> timestampList = new List<NativeArray<float>>();
    List<NativeList<int2>> resultList = new List<NativeList<int2>>();
    while (simulate)
    {
        move(dummy);
        time += timestep;

        NativeArray<float> timestampArr = new NativeArray<float>(1, Allocator.TempJob);
        NativeList<int2> result = new NativeList<int2>(Allocator.TempJob);
        JobHandle jobHandle = GetOccupiedTilesJob(timestep, dummy, grid, ref timestampArr, ref result);
        jobHandleList.Add(jobHandle);
        timestampList.Add(timestampArr);
        resultList.Add(result);
    }

    // get data from jobs and clean up
    JobHandle.CompleteAll(jobHandleList);
    for(int i = 0; i < timestampList.Count; i++)
    {
        // BOTTLENECK HERE
        // TODO: once unity supports it, use resultList[i].ToList()
        /*List<int2> coordinates = new List<int2>();
        for(int j = 0; j < resultList[i].Length; j++)
        {
            coordinates.Add(resultList[i][j]);
        }
        trajectory.Add(timestampList[i][0], coordinates);*/
        trajectory.AddNative(timestampList[i][0], resultList[i]);

        timestampList[i].Dispose();
        resultList[i].Dispose();
    }

    jobHandleList.Dispose();
    return trajectory;

trajectory.Add使用 a Dictionary<int2, float2>，而 .AddNative 使用 aNativeHashMap<int2, float2>来存储值。trajectory.Add几乎是 AddNative 的两倍，即使我必须先将 NativeList 的所有值复制到一个无用的 List 中。通过使用 Array 作为缓冲区，我可能会使其更快。

这是使用的工作：

[BurstCompile]
public struct GetOccupiedTiles : IJob
{
    public float2 A;
    public float2 B;
    public float2 C;
    public float2 D;

    public float timestamp;
    public NativeArray<float> timestampArr; // only for storage
    public NativeList<int2> result;

    [ReadOnly] public float2 gridCenter;
    [ReadOnly] public float cellSize;

    public void Execute()
    {
        float rate = cellSize / 4f;

        result.Add(WorldToCell(A, gridCenter, cellSize));
        result.Add(WorldToCell(B, gridCenter, cellSize));
        result.Add(WorldToCell(C, gridCenter, cellSize));
        result.Add(WorldToCell(D, gridCenter, cellSize));

        SampleLine(A, B, rate, ref result, gridCenter, cellSize);
        SampleLine(B, C, rate, ref result, gridCenter, cellSize);
        SampleLine(C, D, rate, ref result, gridCenter, cellSize);
        SampleLine(D, A, rate, ref result, gridCenter, cellSize);

        float2 AB = (B - A);
        float2 AB_normalized = math.normalize(AB);
        float2 DC_normalized = math.normalize(C - D);
        float length = math.sqrt(AB.x * AB.x + AB.y * AB.y);
        float current = rate;
        while (current < length)
        {
            float2 X = A + AB_normalized * current;
            float2 Y = D + DC_normalized * current;

            SampleLine(X, Y, rate, ref result, gridCenter, cellSize);
            current += rate;
        }

        timestampArr[0] = timestamp;
    }

    private static void SampleLine(float2 A, float2 B, float sampleRate, ref NativeList<int2> occupied, float2 gridCenter, float cellSize)
    {
        float2 AB = (B - A);
        float2 AB_scaled = math.normalize(AB) * sampleRate;
        float2 test = A + AB_scaled;
        float length = math.sqrt(AB.x * AB.x + AB.y * AB.y);
        float current = sampleRate;

        while (current < length)
        {
            int2 cell = WorldToCell(test, gridCenter, cellSize);
            // optimally i want to use a hashset here because .IndexOf is very slow
            if (occupied.IndexOf(cell) == -1) occupied.Add(cell);
            test += AB_scaled;
            current += sampleRate;
        }
    }

    public static int2 WorldToCell(float2 vector, float2 gridCenter, float cellSize)
    {
        return (int2)math.floor(-(gridCenter - vector) / cellSize);
    }
}

现在我的问题是，如何防止代价高昂的NativeListvsList转换？为什么将 a 添加NativeList到 aNativeHashMap比将 a 转换NativeList为 aList并将该列表添加到 a 慢Dictionary？

另外，将工作分成更小的工作可能会更好，因为我可以调用SampleLine甚至WorldToCell并行调用？但是我不知道工作是否可以开始其他工作，或者我是否应该只做WorldToCell一份工作（因为它被称为最多）并在普通 C# 中完成其余的工作？我对工作系统完全陌生，所以我不知道应该如何“工作”。我可以把它留在一个大工作中，我可以做很多小工作，WorldToCell或者我可以把所有三个计算都做一个工作。

c# - 具有本机集合的 Unity 作业系统比具有 C# 集合的作业系统慢

0 回答 0

Related

Reference