我正在使用 OpenCL 并行化 LBM,并且遇到了关于如何为自定义数据类型生成内核头文件作为内核参数的问题。rebound.cl
我根据需要typedef struct {...} t_speed;
在内核文件t_speed
(虽然这比主要问题更令人烦恼,但修复它会节省大量时间!
内核文件: rebound.cl
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#elif defined(cl_amd_fp64)
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#else
#error "Double precision floating point not supported by OpenCL implementation."
#endif
#define NSPEEDS 5
typedef struct {
double speeds[NSPEEDS];
} t_speed;
__kernel void rebound (__global t_speed* cells,
__global t_speed* tmp_cells,
__global const unsigned char* obstacles,
const unsigned short int count)
{
int i = get_global_id(0);
if (i < count) {
if (obstacles[i]) {
cells[i].speeds[1] = tmp_cells[i].speeds[3]; /* East -> West */
cells[i].speeds[3] = tmp_cells[i].speeds[1]; /* West -> East*/
cells[i].speeds[2] = tmp_cells[i].speeds[4]; /* North -> South */
cells[i].speeds[4] = tmp_cells[i].speeds[2]; /* South -> North */
}
}
}
内核头文件: rebound.cl.h
/***** GCL Generated File *********************/
/* Automatically generated file, do not edit! */
/**********************************************/
#include <OpenCL/opencl.h>
typedef struct {
double [5] speeds;
} _t_speed_unalign;
typedef _t_speed_unalign __attribute__ ((aligned(8))) t_speed;
extern void (^rebound_kernel)(const cl_ndrange *ndrange, t_speed* cells, t_speed* tmp_cells, cl_uchar* obstacles, cl_ushort count);