1

问题已解决(如果您有兴趣;您可以查看第二段;在该行下方)。现在我有一个新问题;为什么#define BLOCK_DIM 16; 会导致下面的函数出错?随便用16就好。

这是错误

     expected a "]"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                           ^

     line 110: error:
              expected a ")"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                                        ^

     line 110: error: operand
              of "*" must be a pointer
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;

error:
          expected a ";"
          int Idout = get_local_id(0)*(BLOCK_DIM+1)+get_local_id(1);
                                                  ^

和功能

    __kernel void   transpose(
             __global float2* dataout, 
             __global float2* datain, 
             int width, int height)

// width = N (signal length) 
// height = batch_size (number of signals in a batch)

{
// read the matrix tile into shared memory

__local float2 block[32 * (32 + 1)] ;
   unsigned int xIndex = get_global_id(0);
   unsigned int yIndex = get_global_id(1);

    if((xIndex < width) && (yIndex < height))
    {
            unsigned int index_in = yIndex * width + xIndex;
                       int Idin = get_local_id(1)*(32+1)+get_local_id(0);
                       block[Idin]=  datain[index_in];
    }

barrier(CLK_LOCAL_MEM_FENCE);

// write the transposed matrix tile to global memory

             xIndex = get_group_id(1) * 32 + get_local_id(0);
             yIndex = get_group_id(0) * 32 + get_local_id(1);

    if((xIndex < height) && (yIndex < width))
    {
        unsigned int index_out = yIndex * height + xIndex;
        int Idout = get_local_id(0)*(32+1)+get_local_id(1);
                dataout[index_out] = block[Idout];
    }

}

================================

我正在努力提高图像上 2D FFT 的性能。经过基准测试;我 regconize 转置函数是使程序变慢的原因,所以我用更优化的替换它。

但在那之后;我收到了之前正常工作的所有功能的返回码CL_INVALID_KERNEL_NAME。除了转置函数和clSetKernelArg宿主代码中;我没有改变其他任何东西。所以我没主意。希望大家帮帮我:)

更新:这里是错误。不要介意行号:) 这些行对我来说似乎很正常。有什么问题吗 ?

错误:

     expected a "]"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                           ^

     line 110: error:
              expected a ")"
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
                                        ^

     line 110: error: operand
              of "*" must be a pointer
      __local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;

error:
          expected a ";"
          int Idout = get_local_id(0)*(BLOCK_DIM+1)+get_local_id(1);
                                                  ^

这是内核文件

新的那一个 :

#define BLOCK_DIM 16

__kernel void   transpose(
             __global float2* dataout, 
             __global float2* datain, 
             int width, int height)

// width = N (signal length) 
// height = batch_size (number of signals in a batch)

{
// read the matrix tile into shared memory

__local float2 block[BLOCK_DIM * (BLOCK_DIM + 1)] ;
   unsigned int xIndex = get_global_id(0);
   unsigned int yIndex = get_global_id(1);

    if((xIndex < width) && (yIndex < height))
    {
            unsigned int index_in = yIndex * width + xIndex;
                       int Idin = get_local_id(1)*(BLOCK_DIM+1)+get_local_id(0);
                       block[Idin]=  datain[index_in];
    }

barrier(CLK_LOCAL_MEM_FENCE);

// write the transposed matrix tile to global memory

             xIndex = get_group_id(1) * BLOCK_DIM + get_local_id(0);
             yIndex = get_group_id(0) * BLOCK_DIM + get_local_id(1);

    if((xIndex < height) && (yIndex < width))
    {
        unsigned int index_out = yIndex * height + xIndex;
        int Idout = get_local_id(0)*(BLOCK_DIM+1)+get_local_id(1);
                dataout[index_out] = block[Idout];
    }

}
4

1 回答 1

0

您的#define 问题.. 他们不需要分号。基本上,#define XY 将在编译之前将代码中所有出现的“X”替换为“Y”,如果最后添加一个分号,它将成为“Y”的一部分并产生大量语法错误。#define 不是语句。

实际上,这是一个简单的解释,但足以满足这个问题的范围(如果您想了解更多信息,我建议您查看预处理器教程和文档)。

于 2012-12-03T13:38:04.393 回答