1

我正在做一项优化 C 代码的任务;教授暗示代码运动应该是解决这个特定问题的主要工具。

这是最初的未优化代码:

void naive_smooth(int dim, pixel *src, pixel *dst) {
    int i, j;
    for (i = 0; i < dim; i++)
        for (j = 0; j < dim; j++)
            dst[RIDX(i, j, dim)] = avg(dim, i, j, src);
}

typedef struct {
    unsigned short red;
    unsigned short green;
    unsigned short blue;
} pixel;

#define RIDX(i,j,n) ((i)*(n)+(j))

static pixel avg(int dim, int i, int j, pixel *src) {
    int ii, jj;
    pixel_sum sum;
    pixel current_pixel;
    initialize_pixel_sum(&sum);
    for(ii = max(i-1, 0); ii <= min(i+1, dim-1); ii++)
        for(jj = max(j-1, 0); jj <= min(j+1, dim-1); jj++)
            accumulate_sum(&sum, src[RIDX(ii, jj, dim)]);

    assign_sum_to_pixel(&current_pixel, sum);
    return current_pixel;
}

static void initialize_pixel_sum(pixel_sum *sum) {
    sum->red = sum->green = sum->blue = 0;
    sum->num = 0;
    return;
}

static void accumulate_sum(pixel_sum *sum, pixel p) {
    sum->red += (int) p.red;
    sum->green += (int) p.green;
    sum->blue += (int) p.blue;
    sum->num++;
    return;
}

static void assign_sum_to_pixel(pixel *current_pixel, pixel_sum sum) {
    current_pixel->red = (unsigned short) (sum.red/sum.num);
    current_pixel->green = (unsigned short) (sum.green/sum.num);
    current_pixel->blue = (unsigned short) (sum.blue/sum.num);
    return;
}

这是我到目前为止的优化版本:

void smooth(int dim, pixel *src, pixel *dst) {
    int i, j;
    int ii, jj;
    pixel_sum sum;
    pixel current_pixel;
    pixel p;
    int localDim = dim;
    for (i = 0; i < localDim; i++) {
        int dimi = localDim * i;
        int mini = min(i+1, localDim-1);
        int maxi = max(i-1, 0);
        for (j = 0; j < localDim; j++) {
            int minj = min(j+1, localDim-1);
            int maxj = max(j-1, 0);
            sum.red = sum.green = sum.blue = sum.num = 0;
            for(ii = maxi; ii <= mini; ii++) {
                int dimii = localDim * ii;
                for(jj = maxj; jj <= minj; jj++) {
                    p = src[dimii + jj];
                    sum.red += (int) p.red;
                    sum.green += (int) p.green;
                    sum.blue += (int) p.blue;
                    sum.num++;
                }
            }
            current_pixel.red = (unsigned short) (sum.red/sum.num);
            current_pixel.green = (unsigned short) (sum.green/sum.num);
            current_pixel.blue = (unsigned short) (sum.blue/sum.num);
            dst[dimi + j] = current_pixel;
        }
    }
}

到目前为止,我对此所做的更改都是我认为应该是代码运动优化原则的所有事情——我已经将方法调用(avg、initialize_pixel_sum 等)转换为本地代码,创建了全局变量的本地版本(dim 到 localDim),并将循环定义中的函数移到循环之外(min 和 max 函数以及 dim/localDim 上的乘法)。然而,虽然任务附带的测试套件表明这与原始代码产生了相同的结果,但它显示 CPE 完全没有变化......所以我重写了代码而没有造成任何优化效果。

我真的很困惑为什么这没有任何效果,并且非常感谢任何关于什么样的代码运动会导致这组功能的 CPE 变化的见解。

谢谢!

4

0 回答 0