0

我试图构建我的内核代码,但它不起作用。甚至没有出现错误消息和错误代码 Could not open file: C:\Users\? ,控制台上只写了短语。

通过部分注释掉,我发现了错误的位置。

以下是制造错误的部分。

err = clBuildProgram(program, 1, devices, "-cl-fast-relaxed-math", NULL, NULL);
CHECK_ERROR(err);

我认为内核代码没有问题,因为当我故意在内核代码上出现语法错误时,我收到了另一条带有错误代码的消息

"Could not open file: C:\Users\?[D:\OpenCLProjects\Exam03\Exam02\Exam01.c:79] OpenCL error -11"

下面是我的整个代码。

__kernel void vec_add(__global int* A, __global int* B, __global int* C) {
    int i = get_global_id(0);
    C[i] = A[i] + B[i];
}
#define _CRT_SECURE_NO_WARNINGS
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>


#define NUM_OF_INT 16384

#define CHECK_ERROR(err) \
    if(err != CL_SUCCESS) { \
        printf("[%s:%d] OpenCL error %d\n", __FILE__, __LINE__, err); \
        exit(EXIT_FAILURE); \
    }

char* get_source_code(const char* file_name, size_t * len);

int main()
{
    cl_int err;
    cl_uint num_platforms;
    cl_platform_id* platforms;
    
    cl_uint num_devices;
    cl_device_id* devices;

    cl_context context;

    cl_command_queue queue;

    cl_program program;

    char* kernel_source;
    size_t kernel_source_size;

    cl_kernel kernel_vec_add;

    cl_mem bufA, bufB, bufC;

    err = clGetPlatformIDs(0, NULL, &num_platforms);
    CHECK_ERROR(err);

    platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * num_platforms);
    err = clGetPlatformIDs(num_platforms, platforms, NULL);
    CHECK_ERROR(err);

    //
    size_t plat_name_size;
    clGetPlatformInfo(platforms[0], CL_PLATFORM_NAME, 0, NULL, &plat_name_size);
    char* plat_name = (char*)malloc(plat_name_size);
    clGetPlatformInfo(platforms[0], CL_PLATFORM_NAME, plat_name_size, plat_name, NULL);
    printf("%s\n",plat_name);
    //

    err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
    CHECK_ERROR(err);
    devices = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices);
    err = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
    CHECK_ERROR(err);

    //
    size_t dev_name_size;
    clGetDeviceInfo(devices[0], CL_DEVICE_NAME, 0, NULL, &dev_name_size);
    char* dev_name = (char*)malloc(dev_name_size);
    clGetDeviceInfo(devices[0], CL_DEVICE_NAME, dev_name_size, dev_name, NULL);
    printf("%s\n",dev_name);
    //

    context = clCreateContext(NULL, 1, &devices[0], NULL, NULL, &err);
    CHECK_ERROR(err);

    queue = clCreateCommandQueueWithProperties(context, devices[0], 0, &err);
    CHECK_ERROR(err);

    kernel_source = get_source_code("kernel.cl", &kernel_source_size);
    program = clCreateProgramWithSource(context, 1, &kernel_source, &kernel_source_size, &err);
    CHECK_ERROR(err);
    
    err = clBuildProgram(program, 1, devices, "-cl-fast-relaxed-math", NULL, NULL);
    CHECK_ERROR(err);
    
    kernel_vec_add = clCreateKernel(program, "vec_add", &err);
    CHECK_ERROR(err);
    


    srand(time(NULL));
    int* a = (int*)malloc(sizeof(int) * NUM_OF_INT);
    int* b = (int*)malloc(sizeof(int) * NUM_OF_INT);
    int* c = (int*)malloc(sizeof(int) * NUM_OF_INT);


    for (int i = 0; i < NUM_OF_INT; i++) a[i] = rand() % (INT_MAX / 2);
    for (int i = 0; i < NUM_OF_INT; i++) b[i] = rand() % (INT_MAX / 2);
    
    bufA = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int) * NUM_OF_INT, NULL, &err);
    CHECK_ERROR(err);
    bufB = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int) * NUM_OF_INT, NULL, &err);
    CHECK_ERROR(err);
    bufC = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int) * NUM_OF_INT, NULL, &err);
    CHECK_ERROR(err);
    
    err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0, sizeof(int) * NUM_OF_INT, a, 0, NULL, NULL);
    CHECK_ERROR(err);
    err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0, sizeof(int) * NUM_OF_INT, b, 0, NULL, NULL);
    CHECK_ERROR(err);
    
    
    err = clSetKernelArg(kernel_vec_add, 0, sizeof(cl_mem), &bufA);
    CHECK_ERROR(err);
    err = clSetKernelArg(kernel_vec_add, 1, sizeof(cl_mem), &bufB);
    CHECK_ERROR(err);
    err = clSetKernelArg(kernel_vec_add, 2, sizeof(cl_mem), &bufC);
    CHECK_ERROR(err);
    
    size_t global_size = NUM_OF_INT;
    size_t local_size = 1024;
    err = clEnqueueNDRangeKernel(
        queue, kernel_vec_add, 1, NULL,
        &global_size, &local_size,
        0, NULL, NULL);
    CHECK_ERROR(err);


    //clEnqueueCopyBuffer(queue, bufC, bufA, NULL, NULL, sizeof(int) * NUM_OF_INT, NULL, NULL, NULL);

    clFinish(queue);

    err = clEnqueueReadBuffer(queue, bufC, CL_TRUE, 0, sizeof(int) * NUM_OF_INT, c, 0, NULL, NULL);
    CHECK_ERROR(err);
    
    //for (int i = 0; i < NUM_OF_INT; i++) printf("%d = %d+%d\n", c[i],a[i],b[i]);
    printf("\n");
    
    return 0;
}


char* get_source_code(const char* file_name, size_t* len) {
    char* source_code;
    char buf[2] = "\0";
    int cnt = 0;
    size_t length;


    FILE* file = fopen(file_name, "r");
    if (file == NULL) {
        printf("[%s:%d] FAiled to open %s\n", __FILE__, __LINE__, file_name);
        exit(EXIT_FAILURE);
    }

    fseek(file, 0, SEEK_END);
    length = (size_t)ftell(file);
    rewind(file);

    source_code = (char*)malloc(length + 1);
    fread(source_code, length, 1, file);

    for (int i = 0; i < length; i++) {
        buf[0] = source_code[i];
        if (buf[0] == '\n') cnt++;
    }

    source_code[length - cnt] = '\0';
    fclose(file);
    *len = length - cnt;
    return source_code;
}

这是我的完整源代码

4

0 回答 0