我有一个使用 GPU (NVIDA GTX980) 进行图像处理的 C# & .NET 应用程序。有 4 个阶段,我将 CPU 与 GPU 同步(时间不重叠)以进行计时。但数字不加起来。
Launch() 将异步启动 GPU 内核)但 synchronize() 将等到它完成。
- 总计: tThreshold:4.2827ms
- 直方图:3.7714ms
- 直方图总和:0.1065ms
- tIQR:3.8603ms
- tThresholdOnly: 0.4126ms
到底是怎么回事?
public static void threshold()
{
Stopwatch watch = new Stopwatch();
watch.Start();
gpu.Lock();
dim3 block = new dim3(tileWidthBig, tileHeightBig);
dim3 grid = new dim3(Frame.width / tileWidthBig, Frame.height / tileHeightBig);
gpu.Launch(grid, block).gHistogram(gForeground, gPercentile, gInfo);
gpu.Synchronize();
tHistogram = watch.Elapsed.TotalMilliseconds;
block = new dim3(1024);
grid = new dim3(1);
gpu.Launch(grid, block).gSumHistogram(gPercentile);
gpu.Synchronize();
tHistogramSum = watch.Elapsed.TotalMilliseconds - tHistogram;
gpu.Launch(grid, block).gIQR(gPercentile, gInfo);
gpu.Synchronize();
tIQR = watch.Elapsed.TotalMilliseconds - tHistogramSum;
block = new dim3(256, 4);
grid = new dim3(Frame.width / 256, Frame.height / 4);
gpu.Launch(grid, block).gThreshold(gForeground, gMask, gInfo);
gpu.Synchronize();
tThresholdOnly = watch.Elapsed.TotalMilliseconds - tIQR;
gpu.Unlock();
watch.Stop();
tThreshold = watch.Elapsed.TotalMilliseconds;
}