下午好!我需要运行一个用 C++ 编写的使用 OpenCL 的程序。在此之前,我启用了 OpenCL 头文件,安装了 CUDA(这是可选的)并重新安装了 Visual Studio 和 MinGW。我有一个 NVIDIA GeForce 1080。
好吧,我有一个array<int, 3> loc
,由于执行结果loc
应该根据 kernelFile2.cl 更改为15、15、15(找到的是 loc,检查主代码):
kernel void kernelFile(global int *faces, global int *facecount, global int *found, global int *xlength, global int *zlength, global int *ymin)
{
found[0] = 15;
found[1] = 15;
found[2] = 15;
}
抱歉,我对 OpenCL 不太了解,所以我将向您展示整个程序。
文件.cpp:
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <iostream>
#include <fstream>
#include <CL/cl.hpp>
#include <array>
int main()
{
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
_ASSERT(platforms.size() > 0);
auto platform = platforms.front();
std::vector<cl::Device> devices;
platform.getDevices(CL_DEVICE_TYPE_GPU, &devices);
_ASSERT(devices.size() > 0);
auto device = devices.front();
std::cout << "Device: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
std::string fileName;
std::ifstream kernelFile("kernelFile2.cl");
std::string src(std::istreambuf_iterator<char>(kernelFile), (std::istreambuf_iterator<char>()));
cl::Program::Sources sources(1, std::make_pair(src.c_str(), src.length() + 1));
cl::Context context(device);
cl::Program program(context, sources);
auto err = program.build("-cl-std=CL1.2");
std::cout << std::to_string(err) << " error" <<std::endl;//print error code
int facecount = 1;
const int arraysize = 5;//5 times facecount
std::array<int, arraysize> formation = {{0, 0, 0, 1, 3,}};
std::array<int, 3> loc = {0, 0, 0} ;
int xlength = 10000;
int zlength = 10000;
int yrangesize = 1;
int ymin = 59;
cl::Buffer facesbuf(context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * arraysize, formation.data());
cl::Buffer facecountbuf(context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &facecount);
cl::Buffer resultbuf(context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * 3, loc.data());
cl::Buffer xlengthbuf(context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &xlength);
cl::Buffer zlengthbuf(context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &zlength);
cl::Buffer yminbuf(context, CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &ymin);
cl::Kernel kernel(program, "kernelFile");
kernel.setArg(0, facesbuf);
kernel.setArg(1, facecountbuf);
kernel.setArg(2, resultbuf);
kernel.setArg(3, xlengthbuf);
kernel.setArg(4, zlengthbuf);
kernel.setArg(5, yminbuf);
cl::CommandQueue queue(context, device);
std::cout << std::to_string(queue.enqueueNDRangeKernel(kernel, cl::NDRange(NULL), cl::NDRange(xlength, yrangesize, zlength))) << std::endl;
queue.enqueueReadBuffer(resultbuf, CL_TRUE, 0, sizeof(int) * 3, loc.data());
queue.finish();
std::cout << "X: " << loc[0] << " Y: " << loc[1] << " Z: " << loc[2];
}
但是,不幸的是,这个程序并没有改变变量。
一开始它看起来不同,但我改变了它,只留下了有助于理解问题的东西。
如果有人帮助,非常感谢!