我正在做一项优化 C 代码的任务;教授暗示代码运动应该是解决这个特定问题的主要工具。
这是最初的未优化代码:
void naive_smooth(int dim, pixel *src, pixel *dst) {
int i, j;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}
typedef struct {
unsigned short red;
unsigned short green;
unsigned short blue;
} pixel;
#define RIDX(i,j,n) ((i)*(n)+(j))
static pixel avg(int dim, int i, int j, pixel *src) {
int ii, jj;
pixel_sum sum;
pixel current_pixel;
initialize_pixel_sum(&sum);
for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);
assign_sum_to_pixel(¤t_pixel, sum);
return current_pixel;
}
static void initialize_pixel_sum(pixel_sum *sum) {
sum->red = sum->green = sum->blue = 0;
sum->num = 0;
return;
}
static void accumulate_sum(pixel_sum *sum, pixel p) {
sum->red += (int) p.red;
sum->green += (int) p.green;
sum->blue += (int) p.blue;
sum->num++;
return;
}
static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum) {
current_pixel->red = (unsigned short) (sum.red/sum.num);
current_pixel->green = (unsigned short) (sum.green/sum.num);
current_pixel->blue = (unsigned short) (sum.blue/sum.num);
return;
}
这是我到目前为止的优化版本:
void smooth(int dim, pixel *src, pixel *dst) {
int i, j;
int ii, jj;
pixel_sum sum;
pixel current_pixel;
pixel p;
int localDim = dim;
for (i = 0; i < localDim; i++) {
int dimi = localDim * i;
int mini = min(i+1, localDim-1);
int maxi = max(i-1, 0);
for (j = 0; j < localDim; j++) {
int minj = min(j+1, localDim-1);
int maxj = max(j-1, 0);
sum.red = sum.green = sum.blue = sum.num = 0;
for(ii = maxi; ii <= mini; ii++) {
int dimii = localDim * ii;
for(jj = maxj; jj <= minj; jj++) {
p = src[dimii + jj];
sum.red += (int) p.red;
sum.green += (int) p.green;
sum.blue += (int) p.blue;
sum.num++;
}
}
current_pixel.red = (unsigned short) (sum.red/sum.num);
current_pixel.green = (unsigned short) (sum.green/sum.num);
current_pixel.blue = (unsigned short) (sum.blue/sum.num);
dst[dimi + j] = current_pixel;
}
}
}
到目前为止,我对此所做的更改都是我认为应该是代码运动优化原则的所有事情——我已经将方法调用(avg、initialize_pixel_sum 等)转换为本地代码,创建了全局变量的本地版本(dim 到 localDim),并将循环定义中的函数移到循环之外(min 和 max 函数以及 dim/localDim 上的乘法)。然而,虽然任务附带的测试套件表明这与原始代码产生了相同的结果,但它显示 CPE 完全没有变化......所以我重写了代码而没有造成任何优化效果。
我真的很困惑为什么这没有任何效果,并且非常感谢任何关于什么样的代码运动会导致这组功能的 CPE 变化的见解。
谢谢!