1

我正在使用的平台是 AMD radeon 200 系列(蓝宝石 250(GPU-Oland)我正在主机上创建一个类,将指针作为数据成员,并且在内核端也有相同的类。现在如果在内核端,指针存在于类,生成错误(-11)出现。如果我们在内核端删除指针,代码会构建。在那里,指针值被复制到全局指针,取消引用的值为零。

我的主要目标是访问设备端类中的指针。

AMD SDK-3.0 opencl c++ 1.2版

任何帮助,将不胜感激。

  class A
  {
   public:
  int* ptr;
  };

int main()
{
 const int LIST_SIZE = 1;

 int abc=20;
 A *obj=new A;
 obj->ptr= &abc;
printf("\nx=%d\n",*(obj->ptr));

 int *A = new int[LIST_SIZE]; 
int *B = new int[LIST_SIZE];
cl_int z;
for(int i = 0; i < LIST_SIZE; i++) {
    A[i] = i;
    B[i] = LIST_SIZE - i;
}


    // Get available platforms
    vector<Platform> platforms;
    Platform::get(&platforms);

    // Select the default platform and create a context using this platform and the GPU
    cl_context_properties cps[3] = { 
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)(platforms[0])(), 
        0 
    };
    Context context( CL_DEVICE_TYPE_GPU, cps,NULL,NULL,&z);

    // Get a list of devices on this platform
    vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();

    // Create a command queue and use the first device
    CommandQueue queue = CommandQueue(context, devices[0],NULL,&z);

     //Read source file
    std::ifstream sourceFile("kernel.cl");
    std::string sourceCode(
        std::istreambuf_iterator<char>(sourceFile),
        (std::istreambuf_iterator<char>()));
    Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()+1));

  // Make program of the source code in the context
    Program program = Program(context, source,&z);

  // Build program for these specific devices
    z=program.build(devices,"-x clc++",NULL,NULL);
    if(z!=CL_SUCCESS){
            cout<<"build"<<endl;return 1;}
   // Make kernel
     Kernel kernel(program, "vector_add",&z);

    // Create memory buffers
    Buffer bufferA = Buffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int),NULL,&z);
    Buffer bufferB = Buffer(context, CL_MEM_READ_ONLY, LIST_SIZE * sizeof(int),NULL,&z);
    Buffer bufferC = Buffer(context, CL_MEM_WRITE_ONLY, LIST_SIZE * sizeof(int),NULL,&z);
    Buffer bufferD = Buffer(context, CL_MEM_READ_WRITE, sizeof(obj),NULL,&z);


    // Copy lists A and B to the memory buffers
    z= queue.enqueueWriteBuffer(bufferA, CL_TRUE, 0, LIST_SIZE * sizeof(int), A,NULL,NULL);
    if(z!=CL_SUCCESS){
            cout<<"enqueue buff A"<<endl;return 1;}

    z=queue.enqueueWriteBuffer(bufferB, CL_TRUE, 0, LIST_SIZE * sizeof(int), B,NULL,NULL);
    if(z!=CL_SUCCESS){
            cout<<"enqueue buffB"<<endl;return 1;}

    z=queue.enqueueWriteBuffer(bufferD, CL_TRUE, 0, sizeof(obj), obj,NULL,NULL);
    if(z!=CL_SUCCESS){
            cout<<"enqueue buffB"<<endl;return 1;}

    // Set arguments to kernel
    z= kernel.setArg(0, bufferA);
     if(z!=CL_SUCCESS){
            cout<<"kerarg A"<<endl;return 1;}
    z= kernel.setArg(1, bufferB);
     if(z!=CL_SUCCESS){
            cout<<"kerarg buff B"<<endl;return 1;}
    z= kernel.setArg(2, bufferC);
     if(z!=CL_SUCCESS){
         cout<<"kerarg C"<<endl;return 1;}

        z= kernel.setArg(3, bufferD);
     if(z!=CL_SUCCESS){
         cout<<"kerarg C"<<endl;return 1;}




    // Run the kernel on specific ND range
    NDRange global(LIST_SIZE);
    NDRange local(1);
    queue.enqueueNDRangeKernel(kernel, NullRange, global, local,NULL,NULL);

    // Read buffer C into a local list
    int *C = new int[LIST_SIZE];
    queue.enqueueReadBuffer(bufferC, CL_TRUE, 0, LIST_SIZE * sizeof(int), C,NULL,NULL);
     queue.enqueueReadBuffer(bufferD, CL_TRUE, 0, sizeof(obj), obj,NULL,NULL);

    for(int i = 0; i < LIST_SIZE; i ++)
         std::cout << A[i] << " + " << B[i] << " = " << C[i] << std::endl; 


    printf("\nx=%d\n",*(obj->ptr));

return 0;
}

内核代码是

 class A
{
 public:
    //int* ptr;  //generates error in building
    int ptr;
};

__kernel void vector_add(__global int *d,__global int *b,__global int  *c,__global class A *obj)
{
 size_t id=get_global_id(0);

c[id]=d[id]+b[id];

__global int *p=(__global int *)obj->ptr;
printf("kernel *p= %d p= %d  obj->ptr= %d \n",*(p),p,obj->ptr); 
}
4

0 回答 0