我有这段代码(从 JOCL 示例修改),其中 CL 内核采用 float 和 char[] 指针:
import static org.jocl.CL.*;
import org.jocl.*;
public class JOCLEncode {
private static String kernelSource =
"__kernel void "+
"sampleKernel(__global float *c, __global char[] *t)"+
"{"+
" int gid = get_global_id(0);"+
" c[gid] = c[gid] + 4;"+
"}";
public static void main(String[] a) {
float dstArray[] = new float[5];
char charArray[] = new char[2];
Pointer charsPointer = Pointer.to(charArray);
Pointer dstPointer = Pointer.to(dstArray);
final int platformIndex = 0;
final long deviceType = CL_DEVICE_TYPE_ALL;
final int deviceIndex = 0;
CL.setExceptionsEnabled(true);
int numPlatformsArray[] = new int[1];
clGetPlatformIDs(0, null, numPlatformsArray);
int numPlatforms = numPlatformsArray[0];
System.out.println("Num devices available: " + numPlatformsArray[0]);
cl_platform_id platforms[] = new cl_platform_id[numPlatforms];
clGetPlatformIDs(platforms.length, platforms, null);
cl_platform_id platform = platforms[platformIndex];
cl_context_properties contextProperties = new cl_context_properties();
contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform);
int numDevicesArray[] = new int[1];
clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray);
int numDevices = numDevicesArray[0];
cl_device_id devices[] = new cl_device_id[numDevices];
clGetDeviceIDs(platform, deviceType, numDevices, devices, null);
cl_device_id device = devices[deviceIndex];
cl_context context = clCreateContext(contextProperties, 1, new cl_device_id[]{device}, null, null, null);
cl_command_queue commandQueue = clCreateCommandQueue(context, device, 0, null);
cl_mem memObjects[] = new cl_mem[2];
memObjects[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_float * 5, null, null);
System.out.println("Created shared memory");
memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * 5, charsPointer, null);
//make and build program
String[] kernelSrc = {kernelSource};
cl_program program = clCreateProgramWithSource(context, 1, kernelSrc, null, null);
clBuildProgram(program, 0, null, null, null, null);
cl_kernel kernel = clCreateKernel(program, "sampleKernel", null);
clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0]));
clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1]));
//i think this is how many times it runs
long global_work_size[] = {5};
long local_work_size[] = {1};
//execution ?
clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, global_work_size, local_work_size, 0, null, null);
//i think it puts the finished vals in dst
clEnqueueReadBuffer(commandQueue, memObjects[0], CL_TRUE, 0, 5 * Sizeof.cl_float, dstPointer, 0, null, null);
clReleaseMemObject(memObjects[0]);
clReleaseMemObject(memObjects[1]);
clReleaseKernel(kernel);
clReleaseProgram(program);
clReleaseCommandQueue(commandQueue);
clReleaseContext(context);
System.out.println("Result: " + java.util.Arrays.toString(dstArray));
}
}
我认为问题出在内核中,特别是 . char[]
,但我无法弄清楚。是它,还是它在构建它?