当程序在编译时没有返回任何错误时,它似乎没有调用我内核上的辅助函数。
此函数是 2 个向量数组的简单点积,应将结果分配给浮点数组,但结果仍保留其默认值 (0.0)。
我错过了什么?我调用 clEnqueueWriteBuffer / clEnqueueReadBuffer 的顺序应该使它工作,除非我使用了一些错误的参数?
这是kernel.cl
typedef unsigned long int64;
typedef float fp32;
typedef struct Vector3
{
fp32 x;
fp32 y;
fp32 z;
}Vec3;
void DotProduct(__global Vec3* vec1,__global Vec3* vec2,__global fp32* resultArr,int64 len)
{
for(int i=0;i<len;i++)
{
resultArr[i] = (vec1[i].x * vec2[i].x) + (vec1[i].y * vec2[i].y) + (vec1[i].z * vec2[i].z);
}
}
__kernel void CallDotProduct(__global Vec3* vec1,int64 len1,__global Vec3* vec2,int64 len2,__global fp32* resultArr,int64 resultCount)
{
if((len1==len2 && len1==resultCount) || (len1<len2 && len1==resultCount))
{
DotProduct(vec1,vec2,resultArr,len1);
}
}
资源
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#define CL_TARGET_OPENCL_VERSION 200
#define _CRT_SECURE_NO_DEPRECATE /*open with FILE*/
#include <iostream>
#include "AppCL.h"
#include <iomanip>
#include <cstdlib>
#define NELEMS(x) (sizeof(x) / sizeof((x)[0]))
void GetPlatformInfo(const cl_uint& numPlatforms, cl_platform_id* platforms, std::string& details)
{
std::string attributeNames[] = { "Name", "Vendor", "Version", "Profile", "Extensions" };
const cl_platform_info attributeTypes[5] = { CL_PLATFORM_NAME,CL_PLATFORM_VENDOR,CL_PLATFORM_VERSION,CL_PLATFORM_PROFILE,
CL_PLATFORM_EXTENSIONS };
size_t infoSize;
int numAttributes = NELEMS(attributeTypes);
char* info;
std::string infoContent = "";
for (size_t i = 0; i < numPlatforms; i++)
{
for (size_t j = 0; j < numAttributes; j++)
{
clGetPlatformInfo(platforms[i], attributeTypes[j], 0, NULL, &infoSize);
info = new char[infoSize];
// get platform attribute value
clGetPlatformInfo(platforms[i], attributeTypes[j], infoSize, info, NULL);
infoContent.assign(info, info + strlen(info));
details += attributeNames[j] + " " + infoContent + '\n';
}
std::cout << details << std::endl;
}
}
void ReadBytes(std::vector<char>* bytes, const std::string& pathDecode, unsigned long& fileSize)
{
std::ifstream* file = new std::ifstream(pathDecode, std::ios::binary);
file->unsetf(std::ios::skipws);
file->seekg(0, std::ios::end);
fileSize = file->tellg();
file->seekg(0, std::ios::beg);
bytes->reserve(fileSize);
bytes->insert(bytes->begin(), std::istream_iterator<char>(*file), std::istream_iterator<char>());
delete file;
}
int Program::ParseFile(std::string& content, const std::string& filename)
{
std::ifstream* openFile = new std::ifstream(filename, std::ios::binary);
size_t fileSize = 0;
if (openFile->is_open())
{
openFile->seekg(0, std::fstream::end);
fileSize = openFile->tellg();
openFile->seekg(0, std::fstream::beg);
}
content.reserve(fileSize);
content.insert(content.begin(), std::istreambuf_iterator<char>(*openFile), std::istreambuf_iterator<char>());
delete openFile;
if (content.size() <= 1)
return 1;
return 0;
}
void GetMessageError(cl_int status, cl_program program, cl_device_id device)
{
size_t length = 0;
status = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
size_t total = length * sizeof(char);
char* buffer = new char[total];
status = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, length, buffer, NULL);
std::string message(buffer, total);
if (!message.empty())
std::cout << message << std::endl;
else
std::cout << "Error: " << status << std::endl;
delete[] buffer;
}
void CheckErrorCode(cl_int status, cl_program& program, cl_device_id& devices, const std::string& message)
{
if (status != 0)
{
std::cout << message << std::endl;
GetMessageError(status, program, devices);
}
}
void Program::GetPlatform(cl_platform_id& platform, cl_uint& numberPlatforms, cl_platform_id* platforms)
{
cl_int status = clGetPlatformIDs(0, NULL, &numberPlatforms);
if (status == 1)
{
std::cout << "Failed Platforms not found" << std::endl;
}
if (numberPlatforms > 0)
{
platforms = new cl_platform_id[numberPlatforms * sizeof(cl_platform_id)];
status = clGetPlatformIDs(numberPlatforms, &platforms[0], &numberPlatforms);
platform = platforms[0];
if (platform == nullptr)
{
std::cout << "Error Obtaining platformId" << std::endl;
}
//std::string temp = "";
//GetPlatformInfo(numberPlatforms,platforms,1,temp);
delete[] platforms;
}
}
void Program::GetDeviceIDs(cl_device_id*& devices, const cl_platform_id& platform)
{
cl_uint deviceCount = 0;
cl_int status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceCount);
if (deviceCount == 0)
{
std::cout << "No GPU device available." << std::endl;
std::cout << "Choose CPU as default device." << std::endl;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceCount);
devices = new cl_device_id[deviceCount * sizeof(cl_device_id)];
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, deviceCount, devices, NULL);
}
else
{
devices = new cl_device_id[(deviceCount * sizeof(cl_device_id))];
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, deviceCount, devices, NULL);
}
}
void Program::GetContext(cl_context& context, const cl_platform_id& platform, cl_device_id*& devices)
{
cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM,reinterpret_cast<cl_context_properties>(platform), 0 };
cl_int status = 0;
context = clCreateContext(contextProperties, 1, devices, NULL, NULL, &status);
if (status != 0)
{
std::cout << "Error creating context for device" << std::endl;
}
}
void Program::GetCommandQueue(cl_command_queue& commandQueue, const cl_context& context, cl_device_id*& devices)
{
commandQueue = clCreateCommandQueue(context, devices[0], 0, NULL);
}
int readfilec(const char* filePath, const char*& content, size_t& filesize)
{
FILE* fp;
fp = fopen(filePath, "rb");
fseek(fp, 0, SEEK_END); // seek to end of file
filesize = ftell(fp); // get current file pointer
fseek(fp, 0, SEEK_SET);
content = new char[filesize + 1];
if ((fread((void*)content, sizeof(char), filesize, fp)) != filesize)
return 1;
fclose(fp);
delete fp;
return 0;
}
template<size_t size>
void Program::CreateProgramWithSource(const char*& filePath, const char*& SourceStr, size_t(&sourceSize)[size], cl_context& context, cl_program& program)
{
cl_int status = 0;
bool parsedFile =readfilec(filePath, SourceStr, sourceSize[0]);
if (parsedFile == 0)
{
std::cout << "File Parsed" << std::endl;
}
//const char* Source = SourceStr.c_str();
std::cout << *SourceStr << std::endl;
program = clCreateProgramWithSource(context, 1, &SourceStr, sourceSize, &status);
if (status != 0)
{
std::cout << "Program couldnt be created" << std::endl;
std::cout << status << std::endl;
}
}
void Program::BuildProgram(cl_program& program, const cl_uint& deviceCount, cl_device_id*& devices, const char* oclVersion)
{
status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
if (status != 0)
{
std::cout << "Program couldnt be built" << std::endl;
GetMessageError(status, program, devices[0]);
}
}
typedef struct Vector3
{
float x;
float y;
float z;
}Vec3;
int main()
{
cl_uint numPlatf = 0;
Program p1;
p1.GetPlatform(p1.platform, numPlatf, p1.platforms);
p1.GetDeviceIDs(p1.devices, p1.platform);
p1.GetContext(p1.context, p1.platform, p1.devices);
p1.GetCommandQueue(p1.commandQueue, p1.context, p1.devices);
p1.CreateProgramWithSource(p1.filePath, p1.sourceCode, p1.sourceSize, p1.context, p1.program);
p1.BuildProgram(p1.program, 1, p1.devices, "-cl-std=CL2.0");
std::string s;
unsigned long long n = ULLONG_MAX;
unsigned long long m = 0;
unsigned long long length1 = 4;
unsigned long long length2 = 5;
unsigned long long resultlen = 5;
Vec3* v1 = (Vec3*)calloc(length1,sizeof(Vec3*) * length1);
Vec3* v2 = (Vec3*)calloc(length1, sizeof(Vec3*) * length2);
float* res = (float*)calloc(resultlen, sizeof(float*) * resultlen);
for (decltype(length1) i = 0; i < resultlen; i++)
{
if (resultlen > length1)
if (i < resultlen - 1)
{
v1[i].x = (float)(rand() % 100) + 1;
v1[i].y = (float)(rand() % 100) + 1;
v1[i].z = (float)(rand() % 100) + 1;
std::cout << i + 1 << ") vec1" << std::endl;
std::cout << "x:" << v1[i].x << " y:" << v1[i].y << " z:" << v1[i].z << std::endl;
}
v2[i].x= (float)(rand() % 100) + 1;
v2[i].y= (float)(rand() % 100) + 1;
v2[i].z = (float)(rand() % 100) + 1;
std::cout <<i+1 <<") vec2" << std::endl;
std::cout << "x:" << v2[i].x << " y:" << v2[i].y << " z:" << v2[i].z << std::endl;
res[i] = 0.0f;
std::cout << i + 1 << ") result: "<< res[i] << std::endl;
}
cl_mem vec1MemObject = clCreateBuffer(p1.context,CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,sizeof(Vec3*)*length1,&v1,&p1.status);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 0");
cl_mem vec2MemObject = clCreateBuffer(p1.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(Vec3*) * length2, &v2, &p1.status);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 1");
cl_mem resBuffer = clCreateBuffer(p1.context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(res) * resultlen, &res, &p1.status);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating Buffer 2");
cl_kernel dotProductKernel = clCreateKernel(p1.program,"CallDotProduct",&p1.status);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed Creating kernel 0");
p1.status = clSetKernelArg(dotProductKernel,0,sizeof(cl_mem),(void*)&vec1MemObject);
CheckErrorCode(p1.status,p1.program,p1.devices[0],"Failed to set Argument 0");
p1.status = clSetKernelArg(dotProductKernel, 1, sizeof(length1), &length1);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 1");
p1.status = clSetKernelArg(dotProductKernel, 2, sizeof(cl_mem), (void*)&vec2MemObject);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 2");
p1.status = clSetKernelArg(dotProductKernel, 3, sizeof(length2), &length2);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 3");
p1.status = clSetKernelArg(dotProductKernel, 4, sizeof(cl_mem), &resBuffer);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 4");
p1.status = clSetKernelArg(dotProductKernel, 5, sizeof(resultlen), &resultlen);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to set Argument 5");
size_t globalWorkSize[1] = { 1 };
size_t localWorkSize = 64;
p1.status = clEnqueueReadBuffer(p1.commandQueue, vec1MemObject, CL_FALSE, 0, sizeof(Vec3*), &v1, 0, NULL, NULL);
p1.status = clEnqueueReadBuffer(p1.commandQueue, vec2MemObject, CL_FALSE, 0, sizeof(Vec3*), &v2, 0, NULL, NULL);
p1.status = clEnqueueWriteBuffer(p1.commandQueue, resBuffer, CL_FALSE, 1, sizeof(res) * resultlen, &res, 0, NULL, NULL);
p1.status = clEnqueueNDRangeKernel(p1.commandQueue, dotProductKernel, 1, NULL, globalWorkSize, &localWorkSize, 0, NULL, NULL);
for (decltype(length1) i = 0; i < resultlen; i++)
{
if (resultlen > length1)
if (i < resultlen - 1)
{
std::cout <<i + 1 << ") vec1" << std::endl;
std::cout << "x:" << v1[i].x << " y:" << v1[i].y << " z:" << v1[i].z << std::endl;
}
std::cout << '*' << std::endl;
std::cout << i + 1 << ") vec2" << std::endl;
std::cout << "x:" << v2[i].x << " y:" << v2[i].y << " z:" << v2[i].z << std::endl;
std::cout << '=' << std::endl;
std::cout << res[i] << std::endl;
}
return 0;
}
应用程序CL.h
#ifndef APPLICATIONCL_H
#define APPLICATIONCL_H
#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <iterator>
#include <fstream>
#include <sstream>
#include <CL/cl.h>
class Program
{
friend int readfilec(const char* filePath,const char*& content, size_t& filesize);
friend void CheckErrorCode(cl_int status, cl_program& program, cl_device_id& devices, const std::string& message);
friend void GetMessageError(cl_int status, cl_program program, cl_device_id device);
public:
cl_int status;
cl_platform_id platform;// = nullptr;
cl_device_id* devices;// = nullptr;
cl_context context;
cl_command_queue commandQueue;
cl_program program;
size_t sourceSize[1] = { 0 };
const char* sourceCode;// = nullptr;
std::string strSource;
const char* filePath;
unsigned char* fileBytes;
cl_platform_id* platforms;
//="kernelApplication.cl";
inline void SetFilePath(const char* path) { filePath = path; }
inline const char* GetFilePath() { return filePath; }
void GetPlatform(cl_platform_id& platform, cl_uint& numberPlatforms, cl_platform_id* platforms);
void GetDeviceIDs(cl_device_id*& devices, const cl_platform_id& platform);
void GetContext(cl_context& context, const cl_platform_id& platform, cl_device_id*& devices);
void GetCommandQueue(cl_command_queue& commandQueue, const cl_context& context, cl_device_id*& devices);
template<size_t size>
void CreateProgramWithSource(const char*& filePath, const char*& SourceStr, size_t(&sourceSize)[size], cl_context& context, cl_program& program);
void BuildProgram(cl_program& program, const cl_uint& deviceCount, cl_device_id*& devices, const char* oclVersion);
int ParseFile(std::string& content, const std::string& filename);
Program() : platform(nullptr), commandQueue(NULL), program(NULL), context(NULL), devices(nullptr), sourceCode(nullptr), platforms(nullptr), filePath("kernel.cl"), status(0)
{
}
private:
};
#endif // !APPLICATIONCL_H