我试图通过循环展开来优化这段代码,
void naive_flip(int dim, pixel *src, pixel *dst)
{
int i, j;
for (i = 0; i < dim; i++){
for (j = 0; j < dim; j++){
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
}
}
}
但是,我以前没有真正做过,所以当我尝试时,我得到了这个
void flip_one(int dim, pixel *src, pixel *dst)
{
//i will be attempting loop unrolling to optimize code
int i, j;
for (i=0; i<dim; i+=32)
{
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i+1, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i+1, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i+1, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i+2, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i+2, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i+2, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
for (int j=0; j<dim; j+=32)
{
dst[RIDX_F(i+3, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i+3, j+1, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i+3, j+2, dim)].blue = src[RIDX(i, j, dim)].blue;
}
}
}
运行代码时,它不起作用,它给我这个错误:
“错误:维度 = 96, 9216 错误
例如,以下两个像素应该具有相等的值:
src[9215].{红、绿、蓝} = {22543,1426,53562}
dst[9120].{红、绿、蓝} = {0,0,0}"
对我做错了什么或我应该做什么的任何帮助表示赞赏
编辑我用这个更新了我的代码
void flip_one(int dim, pixel *src, pixel *dst)
{
//i will be attempting loop unrolling to optimize code
int i, j;
for (i=0; i<dim; i++)
{
for (int j=0; j<dim; j++)
{
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
}
}
}
我不再收到错误(耶!)但这实际上并没有加快它,实际上它减慢了它。也许我做错了什么,但是,我不知道是什么。
编辑我更新了代码看起来像
void flip_one(int dim, pixel *src, pixel *dst)
{
//i will be attempting loop unrolling to optimize code
int i, j;
for (i=0; i<dim; i++)
{
for (int j=0; j<dim; j+=4)
{
dst[RIDX_F(i, j, dim)].red = src[RIDX(i, j, dim)].red;
dst[RIDX_F(i, j, dim)].green = src[RIDX(i, j, dim)].green;
dst[RIDX_F(i, j, dim)].blue = src[RIDX(i, j, dim)].blue;
dst[RIDX_F(i, j+1, dim)].red = src[RIDX(i, j+1, dim)].red;
dst[RIDX_F(i, j+1, dim)].green = src[RIDX(i, j+1, dim)].green;
dst[RIDX_F(i, j+1, dim)].blue = src[RIDX(i, j+1, dim)].blue;
dst[RIDX_F(i, j+2, dim)].red = src[RIDX(i, j+2, dim)].red;
dst[RIDX_F(i, j+2, dim)].green = src[RIDX(i, j+2, dim)].green;
dst[RIDX_F(i, j+2, dim)].blue = src[RIDX(i, j+2, dim)].blue;
dst[RIDX_F(i, j+3, dim)].red = src[RIDX(i, j+3, dim)].red;
dst[RIDX_F(i, j+3, dim)].green = src[RIDX(i, j+3, dim)].green;
dst[RIDX_F(i, j+3, dim)].blue = src[RIDX(i, j+3, dim)].blue;
}
}
}