0

我有计算临时缓冲区中数组元素的部分总和的通用内核。

#if FUNC_SUM
#define FUNC(a, b) b += a;
#elif FUNC_ABS_SUM
#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
#elif FUNC_SQR_SUM
#define FUNC(a, b) b += a * a;
#else
#error No sum function
#endif

__kernel void sum(int cols,int invalid_cols,int offset,int elemnum,int groupnum,
                                __global srcT *src, __global dstT *dst)
{
    int lid = get_local_id(0);
    int gid = get_group_id(0);
    int id = get_global_id(0);
    int idx = offset + id + (id / cols) * invalid_cols;

    __local dstT localmem_sum[128];
    dstT sum = (dstT)(0), temp;

    for (int grainSize = groupnum << 8; id < elemnum; id += grainSize)
    {
        idx = offset + id + (id / cols) * invalid_cols;
        temp = convertToDstT(src[idx]);
        FUNC(temp, sum);
    }

    if (lid > 127) 
        localmem_sum[lid - 128] = sum; // ?? 
    barrier(CLK_LOCAL_MEM_FENCE);

    if (lid < 128)
        localmem_sum[lid] = sum + localmem_sum[lid];
    barrier(CLK_LOCAL_MEM_FENCE);

    for (int lsize = 64; lsize > 0; lsize >>= 1)
    {
        if (lid < lsize)
        {
            int lid2 = lsize + lid;
            localmem_sum[lid] = localmem_sum[lid] + localmem_sum[lid2];
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }

    if (lid == 0)
        dst[gid] = localmem_sum[0];
}

并且此代码失败并显示消息“已执行不可访问!” 在标记为 // ?? 的行上 这段代码有什么问题吗?是否存在一些解决方法来避免此错误?

目标平台:AMD GPU

4

0 回答 0