我正在开发一个 OpenFX 插件来处理分级/后期制作软件中的图像。
我所有的处理都是在一系列 Metal 内核函数中完成的。图像作为缓冲区(浮点数组)发送到 GPU,一个用于输入,一个用于输出。
然后输出被 OpenFX 框架用于在主机应用程序中显示,所以在此之前我不必照顾它。
GPU 处理完命令后,我现在需要能够读取输出值。我尝试使用应用于缓冲区的“内容”方法,但我的插件不断崩溃(在最坏的情况下),或者当它“工作”时给我非常奇怪的值(我不应该有任何超过 1 和以下的值0,但我得到非常大的数字,0 或负 0,nan...所以我假设我有某种内存访问问题)。
起初我认为这是私有/共享内存的问题,所以我尝试修改要共享的缓冲区。但我还在挣扎!
完全披露:我没有在 MSL 方面接受过专门的培训,我正在学习这个项目,所以我可能会做和-或说非常愚蠢的事情。在决定寻求帮助之前,我已经环顾了几个小时。感谢所有愿意以任何方式提供帮助的人!
下面是代码(没有与我当前问题无关的所有内容)。如果它缺乏任何有趣的东西,请告诉我。
id < MTLBuffer > srcDeviceBuf = reinterpret_cast<id<MTLBuffer> >(const_cast<float*>(p_Input)) ;
//Below is the destination Image buffer creation the way it used to be done before my edits
//id < MTLBuffer > dstDeviceBuf = reinterpret_cast<id<MTLBuffer> >(p_Output);
//My attempt at creating a Shared memory buffer
MTLResourceOptions bufferOptions = MTLResourceStorageModeShared;
int bufferLength = sizeof(float)*1920*1080*4;
id <MTLBuffer> dstDeviceBuf = [device newBufferWithBytes:p_Output length:bufferLength options:bufferOptions];
id<MTLCommandBuffer> commandBuffer = [queue commandBuffer];
commandBuffer.label = [NSString stringWithFormat:@"RunMetalKernel"];
id<MTLComputeCommandEncoder> computeEncoder = [commandBuffer computeCommandEncoder];
//First method to be computed
[computeEncoder setComputePipelineState:_initModule];
int exeWidth = [_initModule threadExecutionWidth];
MTLSize threadGroupCount = MTLSizeMake(exeWidth, 1, 1);
MTLSize threadGroups = MTLSizeMake((p_Width + exeWidth - 1) / exeWidth,
p_Height, 1);
[computeEncoder setBuffer:srcDeviceBuf offset: 0 atIndex: 0];
[computeEncoder setBuffer:dstDeviceBuf offset: 0 atIndex: 8];
//encodes first module to be executed
[computeEncoder dispatchThreadgroups:threadGroups threadsPerThreadgroup: threadGroupCount];
//Modules encoding
if (p_lutexport_on) {
//Fills the image with patch values for the LUT computation
[computeEncoder setComputePipelineState:_LUTExportModule];
[computeEncoder dispatchThreadgroups:threadGroups threadsPerThreadgroup: threadGroupCount];
}
[computeEncoder endEncoding];
[commandBuffer commit];
if (p_lutexport_on) {
//Here is where I try to read the buffer values (and inserts them into a custom object "p_lut_exp_lut"
float* result = static_cast<float*>([dstDeviceBuf contents]);
//Retrieve the output values and populate the LUT with them
int lutLine = 0;
float3 out;
for (int index(0); index < 35937 * 4; index += 4) {
out.x = result[index];
out.y = result[index + 1];
out.z = result[index + 2];
p_lutexp_lut->setValuesAtLine(lutLine, out);
lutLine++;
}
p_lutexp_lut->toFile();
}