0

为了加快程序中的一些数学例程,我希望尝试将一些数组划分转移到 GPU 上。我正在开发的环境是一个使用monodevelop和使用OpenTk库的linux(Arch),它只是opencl C库的P/Invoke包装器。

具体来说,我能够完成所有启动、构建内核而不会出现错误或日志中报告的任何内容、设置内存缓冲区并设置内核参数。当我尝试使用“clEnqueueTask”方法对任务进行排队时,就会出现问题,此时程序出现段错误。

在这一点上,我不确定如何调试程序,因为没有抛出异常(它一直在抛出更普通的错误,比如缺少 dll),所以我很茫然,我正在寻找一些建议。

非常感谢!

这是代码...

  string vecDivision = @"__kernel void floatDivision(__global float * v1, __global float * v2){
                            // Vector element index
                            int i = get_global_id(0);
                            v1[i] = v1[i] / v2[0];
                          }";

  try{
    fixed(float* srcA = &data[0]){

      ErrorCode err = ErrorCode.Success;
      IntPtr deviceId = IntPtr.Zero;

      int numDevices = -1;

      // get the platform to create the context from
      IntPtr[] platforms = new IntPtr[3];
      uint[] numPlatforms = new uint[3];
      err = (ErrorCode)CL.GetPlatformIDs(3, platforms, numPlatforms);

      // create the context
      IntPtr[] properties = new IntPtr[]{new IntPtr((int)ContextProperties.ContextPlatform), platforms[0], IntPtr.Zero};
      IntPtr context = CL.CreateContextFromType(properties, DeviceTypeFlags.DeviceTypeGpu, IntPtr.Zero, IntPtr.Zero, new ErrorCode[0]);
      if(context == IntPtr.Zero){
        throw new Exception("Count not create CL Context");
      }

      // get the device id
      err = (ErrorCode)CL.GetDeviceIDs(platforms[0],DeviceTypeFlags.DeviceTypeDefault, 1, ref deviceId, ref numDevices);
      if(err != ErrorCode.Success){
        throw new Exception("Count not get the CL device");
      }

      // create the command queue
      IntPtr queue = CL.CreateCommandQueue(context, deviceId, 0, out err);
      if(err != ErrorCode.Success){
        throw new Exception("Count not create CL command queue");
      }

      // create the memory buffers
      IntPtr ptrA = CL.CreateBuffer(context, MemFlags.MemReadWrite | MemFlags.MemUseHostPtr, new IntPtr(sizeof(float)*length), (IntPtr)srcA   , out err);
      if(err != ErrorCode.Success){
        throw new Exception("Count not create CL buffer");
      }
      IntPtr ptrB = CL.CreateBuffer(context, MemFlags.MemReadOnly | MemFlags.MemUseHostPtr, new IntPtr(sizeof(float))       , (IntPtr)divisor, out err);
      if(err != ErrorCode.Success){
        throw new Exception("Count not create CL buffer");
      }

      // create the program using the source
      IntPtr lengths = IntPtr.Zero;
      IntPtr program = CL.CreateProgramWithSource(context, 1,new string[]{vecDivision}, ref lengths, out err);
      if(err != ErrorCode.Success){
        throw new Exception("Count not create CL program from source");
      }

      // build the program from source
      err = (ErrorCode)CL.BuildProgram(program, 1, new IntPtr[]{deviceId}, "", IntPtr.Zero, IntPtr.Zero);
      if(err != ErrorCode.Success){
        throw new Exception("Count not build the CL program");
      }

      // get the build log
      char[] log = new char[2000];
      IntPtr returnSize = IntPtr.Zero;
      fixed(char* logp = &log[0]){
        err = (ErrorCode)CL.GetProgramBuildInfo(program, deviceId, ProgramBuildInfo.ProgramBuildLog, new IntPtr(log.Length), new IntPtr(logp), out returnSize);
      }
      MsgHandling.LogMessage(new string(log).Substring(0,(int)returnSize), string.Empty);
      if(err != ErrorCode.Success){
        throw new Exception("Count not create CL kernel");
      }

      // create the kernel object
      IntPtr kernel = CL.CreateKernel(program, "floatDivision", out err);
      if(err != ErrorCode.Success){
        throw new Exception("Count not create CL kernel");
      }

      // set the arguments for the program
      err = (ErrorCode)CL.SetKernelArg(kernel, 0, new IntPtr(sizeof(float*)), new IntPtr(&ptrA));
      if(err != ErrorCode.Success){
        throw new Exception("Count not set CL first argument");
      }
      err = (ErrorCode)CL.SetKernelArg(kernel, 1, new IntPtr(sizeof(float*)), new IntPtr(&ptrB));
      if(err != ErrorCode.Success){
        throw new Exception("Count not set CL second argument");
      }

      // queue up the worker
      IntPtr globalWorkOffset = IntPtr.Zero;
      IntPtr globalWorkSize = new IntPtr(length);
      IntPtr localWorkSize = new IntPtr(64);
      IntPtr eventWaitList = IntPtr.Zero;
      IntPtr eventItem = IntPtr.Zero;
      //err = (ErrorCode)CL.EnqueueNDRangeKernel(queue, kernel, 1, null, new IntPtr[]{globalWorkSize}, new IntPtr[]{localWorkSize}, 0, null, new IntPtr[]{eventItem});

      // I DIE HERE!
      err = (ErrorCode)CL.EnqueueTask(queue, kernel, 0,null, new IntPtr[]{eventItem});
      // I DIE HERE!

      if(err != ErrorCode.Success){
        throw new Exception("Count not queue CL job");
      }

      // wait for it to finish
      err = (ErrorCode)CL.Finish(queue);
      if(err != ErrorCode.Success){
        throw new Exception("Count not wait for CL job to finish");
      }

    }

更新:

我想到了。问题是我如何创建缓冲区。显然,如果您使用 MemUseHostPtr 选项,则必须使用另一种方法来排队读取缓冲区,因此我使用了将内存直接复制到 GPU 内存的选项,从长远来看,这可能是一个更好的选择。

4

1 回答 1

1

我想到了。问题是我如何创建缓冲区。显然,如果您使用 MemUseHostPtr 选项,则必须使用另一种方法来排队读取缓冲区,因此我使用了将内存直接复制到 GPU 内存的选项,从长远来看,这可能是一个更好的选择。

于 2012-05-04T20:34:19.293 回答