我有这个为 OpenCL 程序编写的 Mandelbrot 内核。为了测试,我决定将所有复平面都放在一个向量上。我的问题是当我打印输出时,我得到一个 1 的列表(如结果数组的初始化),而不是内核工作的结果。
我哪里有问题?
#include <iostream>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
int main(){
using namespace std;
int xPixel=100;
int yPixel=100;
float ics[xPixel];
for(int i=0;i<xPixel;++i)
ics[i]=-2+i*((float)4/xPixel);
float ypsilon[yPixel];
for(int i=0;i<yPixel;++i)
ypsilon[i]=-2+i*((float)4/yPixel);
int results[xPixel*yPixel];
for(int i=0;i<xPixel*yPixel;++i)
results[i]=1;
cl_context context;
cl_context_properties properties[3];
cl_kernel kernel;
cl_command_queue command_queue;
cl_program program;
cl_int err;
cl_uint num_of_platforms=0;
cl_platform_id platform_id;
cl_device_id device_id;
cl_uint num_of_devices=0;
cl_mem memX, memY, memOutput;
size_t global;
const char *KernelSource =
"__kernel void mandelbrot(__global float *ics, __global float *ypsilon, __global int *output){\n"\
"size_t id=get_global_id(0);\n"\
"int yPixel=100;\n"\
"for(int i=0;i<yPixel;i++){\n"\
"float x=0;\n"\
"float y=0;\n"\
"int counter=0;\n"\
"while(counter<1000){\n"\
"if(x*x+y*y>2*2){\n"\
"output[(id*yPixel)+i]=counter;\n"\
"break;\n"\
"}\n"\
"float xTemp=x*x-y*y+ics[id];\n"\
"y=2*x*y+ypsilon[i];\n"\
"x=xTemp;\n"\
"counter++;\n"\
"}\n"\
"}\n"\
"}\n";
// retreives a list of platforms available
if (clGetPlatformIDs(1, &platform_id, &num_of_platforms)!= CL_SUCCESS){
cout<<"Unable to get platform_id\n"<<endl;;
return 1;
}
// try to get a supported GPU device
if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id,&num_of_devices) != CL_SUCCESS){
cout<<"Unable to get device_id\n"<<endl;
return 1;
}
//context properties list - nust be terminated with 0
properties[0]=CL_CONTEXT_PLATFORM;
properties[1]=(cl_context_properties)platform_id;
properties[2]=0;
//create a context with the GPU device
context=clCreateContext(properties,1,&device_id,NULL,NULL,&err);
//create a command queue using the context and device
command_queue=clCreateCommandQueue(context,device_id,0,&err);
//create a program from the kernel source code
program=clCreateProgramWithSource(context,1,(const char**)&KernelSource,NULL,&err);
//compile the program
if(clBuildProgram(program,0,NULL,NULL,NULL,NULL)!=CL_SUCCESS){
cout<<"Error building program"<<endl;
return 1;
}
//specify which kernel from the program to execute
kernel=clCreateKernel(program,"mandelbrot",&err);
//create buffers for input and output
memX=clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float)*xPixel,NULL,NULL);
memY=clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float)*yPixel,NULL,NULL);
memOutput=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(int)*(xPixel*yPixel),NULL,NULL);
//load data into the input buffer
clEnqueueWriteBuffer(command_queue,memX,CL_TRUE,0,sizeof(float)*xPixel,ics,0,NULL,NULL);
clEnqueueWriteBuffer(command_queue,memY,CL_TRUE,0,sizeof(float)*yPixel,ypsilon,0,NULL,NULL);
//set the argument list for the kernel command
clSetKernelArg(kernel,0,sizeof(cl_mem),&memX);
clSetKernelArg(kernel,1,sizeof(cl_mem),&memY);
clSetKernelArg(kernel,2,sizeof(cl_mem),&memOutput);
global=xPixel*yPixel;
//enqueue the kernel command for execution
clEnqueueNDRangeKernel(command_queue,kernel,1,NULL,&global,NULL,0,NULL,NULL);
clFinish(command_queue);
//copy the results from out of the output buffer
clEnqueueReadBuffer(command_queue,memOutput,CL_TRUE,0,sizeof(int)*(xPixel*yPixel),results,0,NULL,NULL);
//print output
for(int i=0;i<xPixel;++i){
for(int j=0;j<yPixel;++j){
cout<<results[(i*yPixel)+j]<<" ";
}
cout<<endl;
}
//cleanup - release OpenCL resources
clReleaseMemObject(memX);
clReleaseMemObject(memY);
clReleaseMemObject(memOutput);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);
}