我正在尝试优化我的应用程序关键部分的性能。用 C 语言编写的代码循环遍历 sourceImage 的所有像素,并计算到每个邻居的“颜色距离”,决定是否记录从 colorDistance 派生的值,然后再转到下一个邻居。
在 XCode 中检测应用程序显示,70% 的时间花在看似简单的浮点计算上——比具有三个 powf 和一个 sqrtf 的代码行长七倍(colorDistance 的计算消耗 10.8%)。
在下面一些代码行的左侧,您将看到从 XCode Instruments 复制所花费的时间百分比。(我还注意到其他平凡的代码行令人惊讶地具有相对较高的百分比,即使与我上面提到的那些不接近)。
任何关于在何处以及如何优化的提示将不胜感激。
干杯
for (int row = 1; row < height - 1; row++)
{
for (int col = 1; col < width - 1; col++)
{
int pixelIndex = (col + row * width);
1.7% int pixelIndexIntoImage = pixelIndex * COMPONENTS_PER_PIXEL;
// loop over pixel's 8 neighbours clockwise starting from neighbor id 0
// using Nx[] and Ny[] as guides to calculate neighbour locations
1.6% for (int n = 0; n < 8; n++)
{
5.3% int neighborIndex = pixelIndex + Nx[n] + width * Ny[n];
int neighborIndexIntoImage = neighborIndex * COMPONENTS_PER_PIXEL;
// skip neighbors that are not a foreground or background
3.3% uint8_t labelValue = labelsMap[neighborIndex];
1.1% if (labelValue == LABEL_UNKNOWN_VALUE)
continue;
// "color distance" between the pixel and the current neighbour
float colorDistance;
1.4% if(numColorComponents == 3)
{
5.3% uint8_t redPixel = sourceImage[pixelIndexIntoImage ];
uint8_t grnPixel = sourceImage[pixelIndexIntoImage+1];
uint8_t bluPixel = sourceImage[pixelIndexIntoImage+2];
uint8_t redNeigh = sourceImage[neighborIndexIntoImage ];
uint8_t grnNeigh = sourceImage[neighborIndexIntoImage+1];
uint8_t bluNeigh = sourceImage[neighborIndexIntoImage+2];
10.8% colorDistance = sqrtf( powf(redPixel-redNeigh, 2) +
powf(grnPixel-grnNeigh, 2) +
powf(bluPixel-bluNeigh, 2));
}
else
{
uint8_t pixel = sourceImage[pixelIndexIntoImage ];
uint8_t neigh = sourceImage[neighborIndexIntoImage];
colorDistance = fabsf(pixel - neigh);
}
71.2% float attackForce = 1.0 - (colorDistance / MAX_COLOR_DISTANCE);
if (attackForce * strengthMap[neighborIndex] > revisedStrengthMap[pixelIndex])
{
//attack succeeds
strengthMap[pixelIndex] = attackForce * revisedStrengthMap[neighborIndex];
outputMask[pixelIndex] = labelsMap[neighborIndex];
isConverged = false; // keep iterating
}
}
}
}
变量的定义
uint8_t *sourceImage; // 4 bytes per pixel
uint8_t *labelsMap, *outputMask; // 1 byte per pixel
int numPixels = width * height;
float *strengthMap = (float*) malloc(sizeof(float)*numPixels);
float *revisedStrengthMap = (float*) malloc(sizeof(float)*numPixels);
short Nx[] = {-1, 0, 1, 1, 1, 0, -1, -1};
short Ny[] = {-1, -1, -1, 0, 1, 1, 1, 0};
根据我收到的建议(乘法比除法“便宜”),我修改了一行代码,有趣的是,71.2% 下降到 1.7%,但是下面的“if”语句飙升到 64.8%——我只是不得到它!
1.7% float attackForce = 1.0 - (colorDistance * MAX_COLOR_DISTANCE_INV);
64.8% if (attackForce * strengthMap[neighborIndex] > revisedStrengthMap[pixelIndex])