0

当程序在编译时没有返回任何错误时,它似乎没有调用我内核上的辅助函数。

此函数是 2 个向量数组的简单点积,应将结果分配给浮点数组,但结果仍保留其默认值 (0.0)。

我错过了什么?我调用 clEnqueueWriteBuffer / clEnqueueReadBuffer 的顺序应该使它工作,除非我使用了一些错误的参数?

这是kernel.cl

typedef unsigned long int64;
typedef float fp32;

typedef struct Vector3
{
    fp32 x;
    fp32 y;
    fp32 z; 
}Vec3;


void DotProduct(__global Vec3* vec1,__global Vec3* vec2,__global fp32* resultArr,int64 len)
{
    for(int i=0;i<len;i++)
    {
        resultArr[i] = (vec1[i].x * vec2[i].x) + (vec1[i].y * vec2[i].y) + (vec1[i].z * vec2[i].z);
    }
}

__kernel void CallDotProduct(__global Vec3* vec1,int64 len1,__global Vec3* vec2,int64 len2,__global fp32* resultArr,int64 resultCount)
{
    if((len1==len2 && len1==resultCount) || (len1<len2 && len1==resultCount))
    {
        DotProduct(vec1,vec2,resultArr,len1);
    }
}

资源

#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_TARGET_OPENCL_VERSION 200
#define _CRT_SECURE_NO_DEPRECATE /*open with FILE*/
#include <iostream>
#include "AppCL.h"
#include <iomanip>
#include <cstdlib>

#define NELEMS(x)  (sizeof(x) / sizeof((x)[0]))

void GetPlatformInfo(const cl_uint& numPlatforms, cl_platform_id* platforms, std::string& details)
{
    std::string attributeNames[] = { "Name", "Vendor", "Version", "Profile", "Extensions" };
    const cl_platform_info attributeTypes[5] = { CL_PLATFORM_NAME,CL_PLATFORM_VENDOR,CL_PLATFORM_VERSION,CL_PLATFORM_PROFILE,
                                                CL_PLATFORM_EXTENSIONS };
    size_t infoSize;
    int numAttributes = NELEMS(attributeTypes);
    char* info;
    std::string infoContent = "";
    for (size_t i = 0; i < numPlatforms; i++)
    {

        for (size_t j = 0; j < numAttributes; j++)
        {
            clGetPlatformInfo(platforms[i], attributeTypes[j], 0, NULL, &infoSize);
            info = new char[infoSize];
            // get platform attribute value
            clGetPlatformInfo(platforms[i], attributeTypes[j], infoSize, info, NULL);
            infoContent.assign(info, info + strlen(info));
            details += attributeNames[j] + " " + infoContent + '\n';

        }
        std::cout << details << std::endl;
    }

}



void ReadBytes(std::vector<char>* bytes, const std::string& pathDecode, unsigned long& fileSize)
{
    std::ifstream* file = new std::ifstream(pathDecode, std::ios::binary);
    file->unsetf(std::ios::skipws);
    file->seekg(0, std::ios::end);
    fileSize = file->tellg();
    file->seekg(0, std::ios::beg);
    bytes->reserve(fileSize);
    bytes->insert(bytes->begin(), std::istream_iterator<char>(*file), std::istream_iterator<char>());
    delete file;
}

int Program::ParseFile(std::string& content, const std::string& filename)
{
    std::ifstream* openFile = new std::ifstream(filename, std::ios::binary);

    size_t fileSize = 0;
    if (openFile->is_open())
    {
        openFile->seekg(0, std::fstream::end);
        fileSize = openFile->tellg();
        openFile->seekg(0, std::fstream::beg);

    }
    content.reserve(fileSize);
    content.insert(content.begin(), std::istreambuf_iterator<char>(*openFile), std::istreambuf_iterator<char>());
    delete openFile;
    if (content.size() <= 1)
        return 1;

    return 0;
}
void GetMessageError(cl_int status, cl_program program, cl_device_id device)
{
    size_t length = 0;
    status = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
    size_t total = length * sizeof(char);
    char* buffer = new char[total];
    status = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, length, buffer, NULL);
    std::string message(buffer, total);
    if (!message.empty())
        std::cout << message << std::endl;
    else
        std::cout << "Error: " << status << std::endl;

    delete[] buffer;
}

void CheckErrorCode(cl_int status, cl_program& program, cl_device_id& devices, const std::string& message)
{
    if (status != 0)
    {
        std::cout << message << std::endl;
        GetMessageError(status, program, devices);
    }

}


void Program::GetPlatform(cl_platform_id& platform, cl_uint& numberPlatforms, cl_platform_id* platforms)
{

    cl_int status = clGetPlatformIDs(0, NULL, &numberPlatforms);
    if (status == 1)
    {
        std::cout << "Failed Platforms not found" << std::endl;
    }

    if (numberPlatforms > 0)
    {
        platforms = new cl_platform_id[numberPlatforms * sizeof(cl_platform_id)];
        status = clGetPlatformIDs(numberPlatforms, &platforms[0], &numberPlatforms);
        platform = platforms[0];

        if (platform == nullptr)
        {
            std::cout << "Error Obtaining platformId" << std::endl;
        }
        //std::string temp = "";
        //GetPlatformInfo(numberPlatforms,platforms,1,temp);
        delete[] platforms;

    }

}

void Program::GetDeviceIDs(cl_device_id*& devices, const cl_platform_id& platform)
{
    cl_uint deviceCount = 0;
    cl_int status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceCount);

    if (deviceCount == 0)
    {
        std::cout << "No GPU device available." << std::endl;
        std::cout << "Choose CPU as default device." << std::endl;
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceCount);
        devices = new cl_device_id[deviceCount * sizeof(cl_device_id)];
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, deviceCount, devices, NULL);
    }
    else
    {
        devices = new cl_device_id[(deviceCount * sizeof(cl_device_id))];
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, deviceCount, devices, NULL);
    }

}

void Program::GetContext(cl_context& context, const cl_platform_id& platform, cl_device_id*& devices)
{
    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM,reinterpret_cast<cl_context_properties>(platform), 0 };
    cl_int status = 0;
    context = clCreateContext(contextProperties, 1, devices, NULL, NULL, &status);
    if (status != 0)
    {
        std::cout << "Error creating context for device" << std::endl;
    }

}

void Program::GetCommandQueue(cl_command_queue& commandQueue, const cl_context& context, cl_device_id*& devices)
{
    commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
}


int readfilec(const char* filePath, const char*& content, size_t& filesize)
{
    FILE* fp;

    fp = fopen(filePath, "rb");
    fseek(fp, 0, SEEK_END); // seek to end of file
    filesize = ftell(fp); // get current file pointer
    fseek(fp, 0, SEEK_SET);
    content = new char[filesize + 1];



    if ((fread((void*)content, sizeof(char), filesize, fp)) != filesize)
        return 1;


    fclose(fp);

    delete fp;

    return 0;

}

template<size_t size>
void Program::CreateProgramWithSource(const char*& filePath, const char*& SourceStr, size_t(&sourceSize)[size], cl_context& context, cl_program& program)
{
    cl_int status = 0;

    bool parsedFile =readfilec(filePath, SourceStr, sourceSize[0]);


    if (parsedFile == 0)
    {
        std::cout << "File Parsed" << std::endl;
    }
    //const char* Source = SourceStr.c_str();
    std::cout << *SourceStr << std::endl;
    program = clCreateProgramWithSource(context, 1, &SourceStr, sourceSize, &status);

    if (status != 0)
    {
        std::cout << "Program couldnt be created" << std::endl;
        std::cout << status << std::endl;
    }
}

void Program::BuildProgram(cl_program& program, const cl_uint& deviceCount, cl_device_id*& devices, const char* oclVersion)
{


    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
    if (status != 0)
    {
        std::cout << "Program couldnt be built" << std::endl;
        GetMessageError(status, program, devices[0]);
    }
}

typedef struct Vector3
{
    float x;
    float y;
    float z;

}Vec3;

int main()
{

    cl_uint numPlatf = 0;


    Program p1;
    p1.GetPlatform(p1.platform, numPlatf, p1.platforms);
    p1.GetDeviceIDs(p1.devices, p1.platform);
    p1.GetContext(p1.context, p1.platform, p1.devices);
    p1.GetCommandQueue(p1.commandQueue, p1.context, p1.devices);
    p1.CreateProgramWithSource(p1.filePath, p1.sourceCode, p1.sourceSize, p1.context, p1.program);
    p1.BuildProgram(p1.program, 1, p1.devices, "-cl-std=CL2.0");


    std::string s;
    unsigned long long n = ULLONG_MAX;
    unsigned long long m = 0;

    unsigned long long length1 = 4;
    unsigned long long length2 = 5;
    unsigned long long resultlen = 5;

    Vec3* v1 = (Vec3*)calloc(length1,sizeof(Vec3*) * length1);
    Vec3* v2 = (Vec3*)calloc(length1, sizeof(Vec3*) * length2);
    float* res = (float*)calloc(resultlen, sizeof(float*) * resultlen);

    for (decltype(length1) i = 0; i < resultlen; i++)
    {

        if (resultlen > length1)
            if (i < resultlen - 1)
            {
                v1[i].x = (float)(rand() % 100) + 1;
                v1[i].y = (float)(rand() % 100) + 1;
                v1[i].z = (float)(rand() % 100) + 1;

                std::cout << i + 1 << ") vec1" << std::endl;
                std::cout << "x:" << v1[i].x << " y:" << v1[i].y << " z:" << v1[i].z << std::endl;
            }

        v2[i].x= (float)(rand() % 100) + 1;
        v2[i].y= (float)(rand() % 100) + 1;
        v2[i].z = (float)(rand() % 100) + 1;

        std::cout <<i+1 <<") vec2" << std::endl;
        std::cout << "x:" << v2[i].x << " y:" << v2[i].y << " z:" << v2[i].z << std::endl;


        res[i] = 0.0f;

        std::cout << i + 1 << ") result: "<< res[i] << std::endl;

    }

    cl_mem vec1MemObject = clCreateBuffer(p1.context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,sizeof(Vec3*)*length1,&v1,&p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 0");
    cl_mem vec2MemObject = clCreateBuffer(p1.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(Vec3*) * length2, &v2, &p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 1");
    cl_mem resBuffer = clCreateBuffer(p1.context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(res) * resultlen, &res, &p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 2");

    cl_kernel dotProductKernel = clCreateKernel(p1.program,"CallDotProduct",&p1.status);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating kernel 0");

    p1.status = clSetKernelArg(dotProductKernel,0,sizeof(cl_mem),(void*)&vec1MemObject);
    CheckErrorCode(p1.status,p1.program,p1.devices[0],"Failed to set Argument 0");

    p1.status = clSetKernelArg(dotProductKernel, 1, sizeof(length1), &length1);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 1");

    p1.status = clSetKernelArg(dotProductKernel, 2, sizeof(cl_mem), (void*)&vec2MemObject);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 2");

    p1.status = clSetKernelArg(dotProductKernel, 3, sizeof(length2), &length2);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 3");

    p1.status = clSetKernelArg(dotProductKernel, 4, sizeof(cl_mem), &resBuffer);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 4");

    p1.status = clSetKernelArg(dotProductKernel, 5, sizeof(resultlen), &resultlen);
    CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 5");

    size_t  globalWorkSize[1] = { 1 };
    size_t localWorkSize = 64;

    p1.status = clEnqueueReadBuffer(p1.commandQueue, vec1MemObject, CL_FALSE, 0, sizeof(Vec3*), &v1, 0, NULL, NULL);
    p1.status = clEnqueueReadBuffer(p1.commandQueue, vec2MemObject, CL_FALSE, 0, sizeof(Vec3*), &v2, 0, NULL, NULL);
    p1.status = clEnqueueWriteBuffer(p1.commandQueue, resBuffer, CL_FALSE, 1, sizeof(res) * resultlen, &res, 0, NULL, NULL);
    p1.status = clEnqueueNDRangeKernel(p1.commandQueue, dotProductKernel, 1, NULL, globalWorkSize, &localWorkSize, 0, NULL, NULL);

    for (decltype(length1) i = 0; i < resultlen; i++)
    {



        if (resultlen > length1)
            if (i < resultlen - 1)
            {
                std::cout <<i + 1 << ") vec1" << std::endl;
                std::cout << "x:" << v1[i].x << " y:" << v1[i].y << " z:" << v1[i].z << std::endl;

            }
        std::cout << '*' << std::endl;
        std::cout << i + 1 << ") vec2" << std::endl;
        std::cout << "x:" << v2[i].x << " y:" << v2[i].y << " z:" << v2[i].z << std::endl;
        std::cout << '=' << std::endl;
        std::cout << res[i] << std::endl;
    }
  return 0;
}

应用程序CL.h

#ifndef APPLICATIONCL_H
#define APPLICATIONCL_H

#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
#include <fstream>
#include <sstream>
#include <CL/cl.h>

class Program
{
    friend int readfilec(const char* filePath,const char*& content, size_t& filesize);
    friend void CheckErrorCode(cl_int status, cl_program& program, cl_device_id& devices, const std::string& message);
    friend void GetMessageError(cl_int status, cl_program program, cl_device_id device);
public:
    cl_int status;
    cl_platform_id platform;// = nullptr;
    cl_device_id* devices;// = nullptr;
    cl_context context;
    cl_command_queue commandQueue;
    cl_program program;
    size_t sourceSize[1] = { 0 };
    const char* sourceCode;// = nullptr;
    std::string strSource;
    const char* filePath;
    unsigned  char* fileBytes;
    cl_platform_id* platforms;
    //="kernelApplication.cl";

    inline void SetFilePath(const char* path) { filePath = path; }
    inline const char* GetFilePath() { return filePath; }
    void GetPlatform(cl_platform_id& platform, cl_uint& numberPlatforms, cl_platform_id* platforms);
    void GetDeviceIDs(cl_device_id*& devices, const cl_platform_id& platform);
    void GetContext(cl_context& context, const cl_platform_id& platform, cl_device_id*& devices);
    void GetCommandQueue(cl_command_queue& commandQueue, const cl_context& context, cl_device_id*& devices);
    template<size_t size>
    void CreateProgramWithSource(const char*& filePath, const char*& SourceStr, size_t(&sourceSize)[size], cl_context& context, cl_program& program);
    void BuildProgram(cl_program& program, const cl_uint& deviceCount, cl_device_id*& devices, const char* oclVersion);

    int ParseFile(std::string& content, const std::string& filename);

    Program() : platform(nullptr), commandQueue(NULL), program(NULL), context(NULL), devices(nullptr), sourceCode(nullptr), platforms(nullptr), filePath("kernel.cl"), status(0)
    {

    }

private:


};

#endif // !APPLICATIONCL_H
4

0 回答 0