2

请看下面的代码,它做了一个简单的字符赋值

__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){

    char* temp;
    bool found = false;
    for(int i = 0 ; i < 11; i++){
        if(gpuHello[i] == ' '){
            temp = &gpuHello[i+1];
            found = true;

            break;
        }
    }
    bool sth = found;
    finalPoint = temp;

}
int main()
{
    // Testing one concept;
    string hello = "Hello World";
    char* gpuHello;
    cudaMalloc((void**)&gpuHello, 11 * sizeof(char));
    cudaMemcpy(gpuHello, hello.c_str(), 11 * sizeof(char), cudaMemcpyHostToDevice);
    char* didItFind;
    char* whatIsIt = (char*)malloc(5 * sizeof(char));
    seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
    cudaMemcpy(whatIsIt,didItFind, 5 * sizeof(char), cudaMemcpyDeviceToHost);
    cout<<"The pointer points to : " << whatIsIt;
    return 0;
}

我真的不明白,当我打印时whatIsIt,为什么它不打印“世界”作为答案,而只是打印一些随机字符串。

如所指出的,在计算空字符后编辑更新版本

__global__ void seehowpointerwork(char* gpuHello, char* finalPoint){

    char* temp;
    bool found = false;
    for(int i = 0 ; i < 11; i++){
        if(gpuHello[i] == ' '){
            temp = gpuHello;
            found = true;

            break;
        }
    }
    bool sth = found;
    finalPoint = temp;

}
int main()
{
    // Testing one concept;
    string hello = "Hello World";
    char* gpuHello;
    cudaMalloc((void**)&gpuHello, 12 * sizeof(char));
    cudaMemcpy(gpuHello, hello.c_str(), 12 * sizeof(char), cudaMemcpyHostToDevice);
    char* didItFind;
    char* whatIsIt = (char*)malloc(6 * sizeof(char));
    seehowpointerwork<<<1,1>>>(gpuHello, didItFind);
    cudaMemcpy(whatIsIt,didItFind, 6 * sizeof(char), cudaMemcpyDeviceToHost);
    cout<<"The pointer points to : " << whatIsIt;
    return 0;
}
4

1 回答 1

3

finalPoint如果你想让内核按照你定义的方式运行,你必须通过引用而不是值传递。也许是这样的:

#include <cstdio>
#include <iostream>
#include <string>

using namespace std;

__global__ void seehowpointerwork(char * gpuHello, char ** finalPoint){

    char* temp;
    for(int i = 0 ; i < 11; i++){
        if(gpuHello[i] == ' '){
            temp = &gpuHello[i+1];
            break;
        }
    }
    *finalPoint = temp;
}

inline void gpuAssert(cudaError_t code, char *file, int line, 
                 bool abort=true)
{  
   if (code != cudaSuccess) {
      printf("GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
      if (abort) exit(code);
   }
}
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }

int main()
{
    string hello = "Hello World";
    char* gpuHello;
    gpuErrchk( cudaMalloc((void**)&gpuHello, 11 * sizeof(char)) );
    gpuErrchk( cudaMemcpy(gpuHello, hello.data(), 11 * sizeof(char), cudaMemcpyHostToDevice) );
    char ** didItFinda, * didItFindb;
    gpuErrchk( cudaMalloc((void **)&didItFinda, sizeof(char *)) );
    char* whatIsIt = (char*)malloc(5 * sizeof(char));
    seehowpointerwork<<<1,1>>>(gpuHello, didItFinda);
    gpuErrchk( cudaPeekAtLastError() );
    gpuErrchk( cudaMemcpy(&didItFindb, didItFinda, sizeof(char *), cudaMemcpyDeviceToHost) );
    gpuErrchk( cudaMemcpy(whatIsIt, didItFindb, 5 * sizeof(char), cudaMemcpyDeviceToHost) );
    cout<<"The pointer points to : " << whatIsIt << endl;
    return 0;
}

编译并运行时,此版本产生:

$ nvcc -arch=sm_12 -Xptxas="-v" programmer.cu 
ptxas info    : Compiling entry function '_Z17seehowpointerworkPcPS_' for 'sm_12'
ptxas info    : Used 4 registers, 8+16 bytes smem, 8 bytes cmem[1]

$ ./a.out 
The pointer points to : World

就目前而言,设备到主机的复制将失败,因为didItFind它不是一个有效的设备指针——你将它按值传递给内核,所以它在主机上的值不能被内核修改。上面的代码包含足够的错误检查来发现此类问题 - 您应该始终检查每个API 调用的返回状态。

于 2012-07-14T21:14:55.143 回答