0

这是我尝试编写一个 opencl 代码来添加 2 个向量

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

#define MAX_SOURCE_SIZE (0x100000)
//24/12
//data structure platform, device, context,program, kernel, command queue

void main()
{
    /////////////////////////////////////////////////////////////////////
    //PLATFORM QUERY:
    /////////////////////////////////////////////////////////////////////
    //clGetPlatformIDs(num_entries, platforms, &num_platforms);
    // two part: platform = NULL
    // malloc and get platforms*
    cl_uint num_platforms; //must be uint
    cl_platform_id *platforms;
    clGetPlatformIDs(5, NULL, &num_platforms);
    printf("There are %d platforms \n", num_platforms);
    platforms = (cl_platform_id*) malloc (num_platforms*sizeof(cl_platform_id)); 
    clGetPlatformIDs(5, platforms, &num_platforms);

    for(int i = 0; i < num_platforms; i++)
    {
        char name[40],vendor[40],version[40], profile[40],extensions[4096];
        clGetPlatformInfo(platforms[i],CL_PLATFORM_NAME, sizeof(name), &name, NULL);
        clGetPlatformInfo(platforms[i],CL_PLATFORM_VENDOR, sizeof(vendor), &vendor, NULL);
        clGetPlatformInfo(platforms[i],CL_PLATFORM_VERSION, sizeof(vendor), &version, NULL);
        clGetPlatformInfo(platforms[i],CL_PLATFORM_PROFILE, sizeof(vendor), &profile, NULL);
        //clGetPlatformInfo(platforms[i],CL_PLATFORM_EXTENSIONS, sizeof(vendor), &extensions, NULL);
        printf("Platform %d \n", i);
        printf("Name %s \n", name);
        printf("Vendor %s \n", vendor);
        printf("Version %s \n", version);
        printf("Profile %s \n", profile);
        //printf("Extension %s \n", extensions);
        printf("----------------------------------\n");
    }

    ////////////////////////////////////////////////////////////////
    //DEVICES QUERYING
    ////////////////////////////////////////////////////////////////
    cl_device_id* devices;
    cl_uint num_devices;
    cl_device_fp_config flag ;
    for(int i= 0; i< num_platforms; i++)
    {
        printf("Platform %d has:\n",i);
        clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 3, NULL, &num_devices);
        devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
        clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
        char name[40];
        for(int j=0; j < num_devices; j++)
        {
            int err= clGetDeviceInfo(devices[j],CL_DEVICE_NAME,sizeof(name),name,NULL);
            if (err<0)
            {
                //printf("Error querying devices name\n");
            }
            else
            {
                printf("Device name %s \n", name);
            }
            err= clGetDeviceInfo(devices[j],CL_DEVICE_NAME,sizeof(flag),&flag,NULL);
            if (flag & CL_FP_DENORM)
            {
                printf("This device support denormalized number \n");
            }
        }
        printf("-----------------------------------\n");
    }

    ///////////////////////////////////////////////////////
    //CONTEXT QUERYING AND CREATING
    ////////////////////////////////////////////////////////
    //NOTE clCreateContext returns cl_context instead of errors
    //REF_COUNT if very important in the future

    //create context for GPU
    cl_context context;
    cl_uint ref_count;
    cl_int err;
    char name[40];
    context= clCreateContext(NULL,1,&devices[0], NULL,NULL,&err);
    clGetContextInfo(context,CL_CONTEXT_REFERENCE_COUNT,sizeof(ref_count), &ref_count, NULL);
    printf("Original reference count is %d \n",ref_count);
    /*clRetainContext(context);
    clGetContextInfo(context,CL_CONTEXT_REFERENCE_COUNT,sizeof(ref_count), &ref_count, NULL);
    printf("Incremented reference count is %d \n",ref_count);
    clReleaseContext(context);
    clGetContextInfo(context,CL_CONTEXT_REFERENCE_COUNT,sizeof(ref_count), &ref_count, NULL);
    printf("Decremented reference count is %d \n",ref_count);*/

    ////////////////////////////////////////////////////////
    //Create programme
    ///////////////////////////////////////////////////////

    size_t program_size;
    err=0;
    cl_program program;
    char* program_buffer;

    FILE* program_handle = fopen("kernel.cl","r"); 

    //More recommendable than source code???
    program_buffer = (char*)malloc(MAX_SOURCE_SIZE);
    program_size = fread( program_buffer, 1, MAX_SOURCE_SIZE, program_handle);
    fclose( program_handle );
    program = clCreateProgramWithSource(context,1,(const char**) &program_buffer,
        (size_t*)&program_size, &err);


    ////////////////////////////////////////////////////////
    //Build Program
    /////////////////////////////////////////////////////// 
    //const char options[] = "-cl-finite-math-only -cl-no-signed-zeros";
    char* program_log;
    size_t log_size;

    err= clBuildProgram(program, 1 , devices, NULL, NULL, NULL);
    if(err < 0) //debug , printing log
    {
        clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
        program_log = (char*) malloc(log_size+1);
        program_log[log_size] = '\0'; 
        clGetProgramBuildInfo(program,devices[0],CL_PROGRAM_BUILD_LOG,log_size,
            program_log,NULL);
        printf("%s\n",program_log);
        free(program_log);
        //exit(1);
    }

    ///////////////////////////////////////////////////////////////////////////////////
    //create kernel
    ///////////////////////////////////////////////////////////////////////////////////
    cl_uint num_kernels;
    cl_kernel kernel;
    char kernel_name[40];
    kernel = clCreateKernel(program,"add",&err);
    if (err<0)
    {
        perror("could not found any kernels\n");
    }
    //kernels = (cl_kernel*)malloc(num_kernels*sizeof(cl_kernel));
    //clCreateKernelsInProgram(program, num_kernels, kernels, NULL);

    ///FOR REFERNECE
    //for(int i=0; i<num_kernels; i++)
    //{
        clGetKernelInfo(kernel,CL_KERNEL_FUNCTION_NAME,sizeof(kernel_name),kernel_name,NULL);
        printf("Kernel function: %s \n",kernel_name);
    //}


    /////////////////////////////////////////////////////
    //Create command queue
    /////////////////////////////////////////////////////
    cl_command_queue queue = clCreateCommandQueue(context, devices[0],CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,&err);
    if (err < 0)
    {
        printf("Couldn't create command queue \n");
        exit(1);
    }
    clEnqueueTask(queue, kernel, 0, NULL, NULL);//only enqueue



    //////////////////////////////////////////
    unsigned int n= 1000; 
    int* h_a;
    int* h_b;
    int* h_c;
    cl_mem d_a;
    cl_mem d_b;
    cl_mem d_c;

    h_a = (int*) malloc(n*sizeof(int));
    h_b = (int*) malloc(n*sizeof(int));
    h_c = (int*) malloc(n*sizeof(int));

    for(int i=0; i< n; i++)
    {
        h_a[i]= 1;//sinf(i)*sinf(i);
        h_b[i]= 1;//cosf(i)*cosf(i);
    }

    d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(h_a),NULL,NULL);
    d_b = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(h_a),NULL,NULL);
    d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR,sizeof(h_a),NULL,NULL);

    err = clEnqueueWriteBuffer(queue,d_a,CL_TRUE,0,sizeof(h_a),h_a,0, NULL, NULL);
    err |= clEnqueueWriteBuffer(queue,d_b,CL_TRUE,0,sizeof(h_b),h_a,0, NULL, NULL);

    //////set argument
    err= clSetKernelArg(kernel,0,sizeof(cl_mem),&d_a);
    err= clSetKernelArg(kernel,1,sizeof(cl_mem),&d_b);
    err= clSetKernelArg(kernel,2,sizeof(cl_mem),&d_c);
    err= clSetKernelArg(kernel,3,sizeof(unsigned int),&n);

    ///////////////
    size_t globalsize, localsize;
    localsize=64;
    globalsize=ceil(n/(float)localsize)*localsize;
    err= clEnqueueNDRangeKernel(queue,kernel,1, NULL,&globalsize,&localsize,0,NULL,NULL);
    ////////////////////////
    clFinish(queue);

    err=clEnqueueReadBuffer(queue, d_c,CL_TRUE, 0, sizeof(h_c), h_c, 0 , NULL, NULL);

    for(int i = 0; i< n; i++)
    {
        printf(" h_c[%d] = %d \n", i, h_c[i]);
    }

    clReleaseMemObject(d_a);
    clReleaseMemObject(d_b);
    clReleaseMemObject(d_c);
    clReleaseProgram(program);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);
    clReleaseKernel(kernel);

    free(h_a);
    free(h_b);
    free(h_c);



    getchar();
}

这是我的 kernel.cl

__kernel void add(__global int * a, __global int *b, __global int* c, const unsigned n)
{
    int id= get_global_id(0);
    if (id<n)
        c[id]= a[id] + b[id];
}

有了这个,我只收到了垃圾值,例如所有 i 的 h_c[i]= -842150451。请帮我修复它。谢谢!

4

1 回答 1

1

这种说法是不正确的:

sizeof(h_a)

应该是这样的:

n * sizeof(int)

实际上h_a只是一个指针,所以sizeof(h_a) = sizeof(int) => 你只有一个项目的空间。

于 2012-12-28T12:27:57.280 回答