我有一段这样的 OpenCL 代码
if (Sum[0] < Best)
{
Best = Sum[0];
iBest = 1;
*aBits = Bits[0];
}
if (Sum[1] < Best)
{
Best = Sum[1];
iBest = 2;
*aBits = Bits[1];
}
if (Sum[2] < Best)
{
Best = Sum[2];
iBest = 3;
*aBits = Bits[2];
}
if (Sum[3] < Best)
{
Best = Sum[3];
iBest = 4;
*aBits = Bits[3];
}
if (Sum[4] < Best)
{
Best = Sum[4];
iBest = 5;
*aBits = Bits[4];
}
if (Sum[5] < Best)
{
Best = Sum[5];
iBest = 6;
*aBits = Bits[5];
}
if (Sum[6] < Best)
{
Best = Sum[6];
iBest = 7;
*aBits = Bits[6];
}
if (Sum[7] < Best)
{
Best = Sum[7];
iBest = 8;
*aBits = Bits[7];
}
为了减少逻辑,我重写了这样的代码
for(i = 1; i < 8; i++)
{
if(Sum[i] < Sum[index])
index = i;
}
if (Sum[index] < Best)
{
Best = Sum[index];
iBest = index + 1;
*aBits = Bits[index];
}
但是,在第二种情况下,延迟增加了大约 20%。任何人都可以对这种行为提供任何见解吗?编码风格是否if conditions
比for loops
OpenCL 更高效?
我正在使用英特尔 530 (Gen9) GPU。我正在使用内存映射访问。