1

I want to write the following CUDA function:

void foo(int* a, size_t n)
{
     if ( /* MAGIC 1 */ ) {
         // a is known to be in shared memory, 
         // so use it directly
     }
     else {
         // make a copy of a in shared memory
         // and use the copy
     }
 }

On the host side, we have a slightly-related facility in the form of cudaPointerGetAttributes, which can tell us whether or not a pointer is to device memory or host memory; perhaps there's some way to distinguish pointers in device code as well, and perhaps it can also discern shared from global pointers. Alternatively, and perhaps even better - maybe there's a compile-time mechanism to do that, since, after all, the device functions are only compiled into kernels and are not freestanding, so nvcc can often know whether they're used with shared memory or not.

4

2 回答 2

5

您可以通过一些内联“汇编”来使用isspacepPTX 指令:

// First, a pointer-size-related definition, in case
// this code is being compiled in 32-bit rather than 
// 64-bit mode; if you know the code is always 64-bit
// you can just use the "l"

#if defined(_WIN64) || defined(__LP64__)
# define PTR_CONSTRAINT "l"
#else
# define PTR_CONSTRAINT "r"
#endif

__device__ int isShared(void *ptr)
{
    int res;
    asm("{"
        ".reg .pred p;\n\t"
        "isspacep.shared p, %1;\n\t"
        "selp.b32 %0, 1, 0, p;\n\t"
        "}" :
        "=r"(res): PTR_CONSTRAINT(ptr));
    return res;
}

所以你的例子变成

__device__ void foo(int* a, size_t n)
{
     if (isShared(a)) {
         // a is known to be in shared memory, 
         // so use it directly
     } else {
         // make a copy of a in shared memory
         // and use the copy
     }
}
于 2017-03-01T08:21:54.493 回答
0

这是@tera答案的概括。

is_in_shared_memory()从以下代码中使用,它为所有可能的设备内存空间定义了类似的函数:

#ifndef STRINGIFY
#define STRINGIFY(_q) #_q
#endif

#define IS_IN_MEMORY_SPACE(_which_space) \
__forceinline__ __device__ int is_in_ ## _which_space ## _memory (const void *ptr) \
{ \
    int result; \
    asm ("{" \
        ".reg .pred p;\n\t" \
        "isspacep." STRINGIFY(_which_space) " p, %1;\n\t" \
        "selp.b32 %0, 1, 0, p;\n\t" \
        "}" \
        : "=r"(result) : "l"(ptr)); \
    return result; \
}

IS_IN_MEMORY_SPACE(const)
IS_IN_MEMORY_SPACE(global)
IS_IN_MEMORY_SPACE(local)
IS_IN_MEMORY_SPACE(shared)

#undef IS_IN_MEMORY_SPACE

如果您正在构建 32 位代码,请将"l"约束(64 位地址)替换为"r".

于 2017-03-01T09:38:22.543 回答