我第一次开始使用 OpenCL,我正在尝试在这个站点 http://www.karlosp.net/blog/2012/05/03/opencl-opencv-sobel-中做这个示例进行 sobel 边缘检测边缘检测器/ 但是当运行 gpu 编号的内核时 fps 小于 15 并且 gpu 利用率小于 5% 如何运行 gpu 的所有线程,如 openmp 以使利用率低于 95%
编码
核心代码
_ kernel void sobel( _global float *A, __global float *R, uint width, uint height) {
int globalIdx = get_global_id(0);
int globalIdy = get_global_id(1);
int index = width * globalIdy + globalIdx;
float a,b,c,d,e,f,g,h,i;
float sobelX = 0;
float sobelY = 0;
if(index > width && index < (height*width)-width && (index % width-1) > 0 && (index % width-1) < width-1){
a = A[index-1-width] * -1.0f;
b = A[index-0-width] * 0.0f;
c = A[index+1-width] * +1.0f;
d = A[index-1] * -2.0f;
e = A[index-0] * 0.0f;
f = A[index+1] * +2.0f;
g = A[index-1+width] * -1.0f;
h = A[index-0+width] * 0.0f;
i = A[index+1+width] * +1.0f;
sobelX = a+b+c+d+e+f+g+h+i;
a = A[index-1-width] * -1.0f;
b = A[index-0-width] * -2.0f;
c = A[index+1-width] * -1.0f;
d = A[index-1] * 0.0f;
e = A[index-0] * 0.0f;
f = A[index+1] * 0.0f;
g = A[index-1+width] * +1.0f;
h = A[index-0+width] * +2.0f;
i = A[index+1+width] * +1.0f;
sobelY = a+b+c+d+e+f+g+h+i;
}
R[index] = sqrt(pow(sobelX,2) + pow(sobelY,2));
}