0

我的内核函数必须存储一个包含 7500 个值的中间私有数组。在运行代码时,我的屏幕会出现一秒钟的空白,并且当它恢复时不会显示结果。假设没有足够的私有内存,我稍微更改了代码。现在它将每个新数组值与迄今为止计算的最大值进行比较。这样我就不需要创建一个包含 7000 个值的数组。相反,我只存储最大的价值。但我仍然遇到同样的问题。那么我的屏幕变黑的原因可能是什么?这是我的内核:

__kernel void sampleKernel(
    const uint trgr,
    const uint trgc,
    __global const float *TRG,
    __global const float *ENT,
    __global float *RES1,
    __global float *RES2)
{
    int pred, tars, preds;
    float big1, big2;
    float g1 = 0, g2 = 0;
    float tol = 0.5f, val = 0.0f;
    int i  =  get_global_id(0);
    for(pred = 0; pred<trgr; pred++)
    {
        val = 0.0f;
        for(tars = 0; tars<trgc; tars++) 
            {
            for(preds = 0; preds<trgc; preds++)
            {
                if(TRG[pred*trgc+preds] > (TRG[pred*trgc+tars]-tol) && TRG[pred*trgc+preds]>(TRG[pred*trgc+tars]+tol) )
                    val = val+1;
            }
        }

        val = ENT[pred]*val;
        if(pred == 0) 
        {
            big1 = val;
            g1 = pred;
        }
        else if(pred == 1)
        {
            if(val>big1)
            {
                big2 = big1;
                g2 = g1;
                big1 = val;
                g1 = pred;
            }
        }
        else
        {
            if(val>big1)
            {
                big2 = big1;
                g2 = g1;
                big1 = val;
                g1 = pred;
            }
            else if(val>big2)
            {
                big2 = val;
                g2 = pred;
            }
        }
    }
    RES1[i] = g1;
    RES2[i] = g2; 
}

存储在private static String programSource;. 这是代码:

 //writing to GPU
 clSetKernelArg(kernel, 0, Sizeof.cl_uint, Pointer.to(new int[]{7000}));
 clSetKernelArg(kernel, 1, Sizeof.cl_uint, Pointer.to(new int[]{36}));
 clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[0]));
 clSetKernelArg(kernel, 3, Sizeof.cl_mem, Pointer.to(memObjects[1]));  
 clSetKernelArg(kernel, 4, Sizeof.cl_mem, Pointer.to(memObjects[2]));
 clSetKernelArg(kernel, 5, Sizeof.cl_mem, Pointer.to(memObjects[3]));

 //reading from GPU
 clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, m * n * Sizeof.cl_float, pres1, 0, null, null);
 clEnqueueReadBuffer(commandQueue, memObjects[3], CL_TRUE, 0, m * n * Sizeof.cl_float, pres2, 0, null, null);
4

0 回答 0