我正在从 Matthew Scarpino 的“OpenCL in action”一书中学习 OpenCL 。第 3 章包含有关缓冲区(pp.45-47)和子缓冲区(pp.47-48)组织的材料。第 47 页上的用于创建子缓冲区的代码示例存在不准确之处,此处和此处的相同问题对此进行了披露。我更进一步并决定调查将存储在子缓冲区中的值传输回主机程序的情况。
我的主机程序正在将一个整数数组传输iaArray1[5] = { 1, 2, 3, 4, 5 }
到内核中。缓冲区memObjArray1
用于执行此操作。从iaArray1
数组ipaArray2
中获得数组,将值存储{ 3, 4, 5, 6, 7 }
为内核将数组的值与常数 2 相加。memObjArray2
输出缓冲区用于将ipaArray2
数组的值完全从设备传输到主机程序。接下来,memObjSubArray
从缓冲区形成子memObjArray2
缓冲区。正在尝试将数据从设备内存子缓冲区传输memObjSubArray
到主机程序。
我相信缓冲区和子缓冲区的数据从内核传输到主机程序的机制是相同的。为此,我使用了相同的函数clEnqueueReadBuffer()
,但程序给出了错误消息。我究竟做错了什么?应该使用什么函数将数据从设备内存子缓冲区传输到主机程序?
内核函数如下:
__kernel void good (global int* iaArray1, global int* iaArray2)
{
int i=get_global_id(0);
iaArray2[i]=iaArray1[i]+2;
}
这是我的程序的代码。所呈现的程序有几个简化。首先,简化了出口分支以缩短代码。其次,原始程序设计为使用多个 cl 文件,因此其中一些变量是数组。
#include <CL\cl.h>
#include <stdio.h>
#include <stdlib.h>
#define PROGRAM_FILE_1 "good.cl"
//#define PROGRAM_FILE_2 "bad.cl"
//#define PROGRAM_FILE_3 "setminusone.cl"
#define NUM_OF_FILES 1
int main(){
cl_platform_id *platforms;
cl_uint numOfPlatforms;
cl_int status;
cl_device_id *devices;
cl_uint numOfDevices;
char caDeviceName[500];
cl_context context;
const char * kcpaFileName[NUM_OF_FILES] = { PROGRAM_FILE_1};
FILE * pProgramHandler;
char * cpaProgramBuffer[NUM_OF_FILES];
size_t saProgramSize[NUM_OF_FILES] = { 0};
cl_uint numOfEnters[NUM_OF_FILES] = { 0};
cl_program program;
const char kcaOptions[] = "-cl-finite-math-only -cl-no-signed-zeros";
size_t sLogSize = 0;
char * cpProgramLog;
cl_uint numOfKernels = 0;
cl_kernel * kernels;
char caKernelName[20];
cl_command_queue cmdQueue0;
printf("Establishing number of available platforms... ");
status = clGetPlatformIDs(NULL, NULL, &numOfPlatforms);
if (status < 0){
printf("FAIL to establish platform(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nEstablised %u platform(s).\nInitializing platform(s)... ", numOfPlatforms);
platforms = (cl_platform_id *)malloc(numOfPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numOfPlatforms, platforms, NULL); //
if (status < 0){
printf("FAIL to initialize platform(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nEstablishing devices... ");
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, NULL, NULL, &numOfDevices);
if (status < 0){
printf("FAIL to establish device(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nEstablished %u device(s).\nInitializing device(s)... ", numOfDevices);
devices = (cl_device_id *)malloc(numOfDevices*sizeof(cl_device_id));
status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numOfDevices, devices, NULL);
if (status < 0){
printf("FAIL to initialize devices(s)!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.");
for (int i = 0; i < numOfDevices; i++){
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(caDeviceName), caDeviceName, NULL);
if (status < 0){
printf("FAIL to read device #%d name!> %d\n", i, status);
system("PAUSE");
exit(1);
}
printf("\nDevice #%d is \"%s\".", i, caDeviceName);
}
printf("\nCreating context... ");
context = clCreateContext(NULL, numOfDevices, devices, NULL, NULL, &status);
if (status < 0){
printf("FAIL to create context!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nReading source code from file(s)... ");
for (int i = 0; i < NUM_OF_FILES; i++){
pProgramHandler = fopen(kcpaFileName[i], "r");
if (pProgramHandler == NULL){
printf("FAIL to open file \"%s\"!> %d\n", kcpaFileName[i], status);
system("PAUSE");
exit(1);
}
fseek(pProgramHandler, 0, SEEK_END);
saProgramSize[i] = ftell(pProgramHandler);
rewind(pProgramHandler);
cpaProgramBuffer[i] = (char*)malloc(sizeof(char)*saProgramSize[i] + 1);
fread(cpaProgramBuffer[i], sizeof(char), saProgramSize[i], pProgramHandler);
cpaProgramBuffer[i][saProgramSize[i]] = '\0';
fclose(pProgramHandler);
for (int j = 0; j < saProgramSize[i]; j++){
if ((char)cpaProgramBuffer[i][j] == (char)10){
numOfEnters[i]++;
}
}
saProgramSize[i] = saProgramSize[i] - numOfEnters[i];
cpaProgramBuffer[i][saProgramSize[i]] = '\0';
}
printf("OK.\nCreating program from source code... ");
program = clCreateProgramWithSource(context, NUM_OF_FILES, (const char **)cpaProgramBuffer, (const size_t *)saProgramSize, &status);
if (status < 0){
printf("FAIL to create program!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nBuilding program... ");
status = clBuildProgram(program, 1, devices, kcaOptions, NULL, NULL);//,
if (status < 0){
printf("FAIL to build program.\n...Genetating log...");
for (int i = 0; i < NUM_OF_FILES; i++){
printf("\nCode from file \"%s\":\n%s", kcpaFileName[i], cpaProgramBuffer[i]);
}
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &sLogSize);
cpProgramLog = (char*)malloc(sizeof(char)*sLogSize + 1);
cpProgramLog[sLogSize] = '\0';
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, sLogSize + 1, cpProgramLog, NULL);
printf("\nLog length is %d.\nLog:\n%s\n> %d\n", sLogSize, cpProgramLog, status);
system("PAUSE");
exit(1);
}
printf("OK.\nDetermining number of kernels... ");
status = clCreateKernelsInProgram(program, NULL, NULL, &numOfKernels);
if (status < 0){
printf("FAIL to determine number of kernels!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nDetermined %d kernel(s):", numOfKernels);
kernels = (cl_kernel*)malloc(sizeof(cl_kernel)*numOfKernels);
clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);
for (int i = 0; i < numOfKernels; i++){
clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(caKernelName), caKernelName, NULL);
printf("\nKernel \"%s\" indexed at %d.", caKernelName, i);
}
printf("\nCreating command queue... ");
cmdQueue0 = clCreateCommandQueue(context, devices[0], NULL, &status);
if (status < 0){
printf("FAIL to create command queue!> %d\n", status);
system("PAUSE");
exit(1);
}
/*Data, buffers and subbuffers*/
int iaArray1[5] = { 1, 2, 3, 4, 5 };
printf("\nPrinting out the initial array:\n");
for (int i = 0; i < 5; i++){
printf("%d ", iaArray1[i]);
}
printf("\nCreating buffers for kernels[0]... ");
int* ipaArray2 = (int*)malloc(5 * sizeof(int));
cl_mem memObjArray1 = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(iaArray1), &iaArray1, &status);
if (status < 0){
printf("\nFAIL to create memObjArray1 buffer!> %d \n", status);
system("PAUSE");
exit(1);
}
cl_mem memObjArray2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(iaArray1), NULL, &status);
if (status < 0){
printf("\nFAIL to create memObjArray2 buffer!> %d \n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nSetting arguments for kernels[0]... ");
status = clSetKernelArg(kernels[0], 0, sizeof(cl_mem), &memObjArray1);
if (status < 0){
printf("\nFAIL to set memObjArray1 argument at kernels[0]!> %d\n", status);
system("PAUSE");
exit(1);
}
status = clSetKernelArg(kernels[0], 1, sizeof(cl_mem), &memObjArray2);
if (status < 0){
printf("\nFAIL to set memObjArray2 argument at kernels[0]!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nExecuting kernels[0]... ");
size_t tGlobal_item_size = 5; //?
size_t tLocal_item_size = 1; //?
status = clEnqueueNDRangeKernel(cmdQueue0, kernels[0], 1, NULL, &tGlobal_item_size, &tLocal_item_size, 0, NULL, NULL);
if (status < 0){
printf("\nFAIL to enqueue kernels[0] into cmdQueue0!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nReading results from device memory buffer to host array... ");
status = clEnqueueReadBuffer(cmdQueue0, memObjArray2, CL_TRUE, 0, 5 * sizeof(int), ipaArray2, 0, NULL, NULL);
if (status < 0){
printf("FAIL to copy results from device to host!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nPrinting out the result array:\n");
for (int i = 0; i < 5; i++){
printf("%d ", ipaArray2[i]);
}
printf("\nCreating subbuffer... ");
int iQuantity = 2;
int iShift = 2;
typedef struct _cl_buffer_region{
size_t size;
size_t origin;
} cl_buffer_region;
cl_buffer_region stRegion;
stRegion.size = iQuantity * sizeof(int);
stRegion.origin = iShift * sizeof(int);
cl_mem memObjSubArray = clCreateSubBuffer(memObjArray2, CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, &stRegion, &status);
if (status < 0){
printf("FAIL to create subbuffer!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nReading results from device memory subbuffer to host array... ");
int* ipaSubArray = (int*)malloc(iQuantity*sizeof(int));
status = clEnqueueReadBuffer(cmdQueue0, memObjSubArray, CL_TRUE, 0, iQuantity*sizeof(int), ipaSubArray, 0, NULL, NULL);
if (status < 0){
printf("FAIL to copy results from device to host!> %d\n", status);
system("PAUSE");
exit(1);
}
printf("OK.\nPrinting out the result array:\n");
for (int i = 0; i < iQuantity; i++){
printf("%d ", ipaSubArray[i]);
}
printf("OK.\n...Releasing resources... ");
clReleaseMemObject(memObjArray1);
clReleaseMemObject(memObjArray2);
clReleaseMemObject(memObjSubArray);
clReleaseCommandQueue(cmdQueue0);
free(kernels);
clReleaseProgram(program);
for (int i = 0; i < NUM_OF_FILES; i++){
free(cpaProgramBuffer[i]);
}
clReleaseContext(context);
free(devices);
free(platforms);
printf("OK.\nEnd of program. Bey!\n");
system("PAUSE");
}