2

我的程序使用 CUDA 基数排序类。从 CUDA 4.0 更新到 4.2 后,类辅助初始化函数正在崩溃,并显示消息“围绕变量 'devprop' 的堆栈已损坏”。我已经隔离了注释一些函数代码的问题,发现 cudaGetDeviceProperties 正在破坏 devprop。我只是不知道为什么会发生这种情况以及如何解决问题。我的设置是 CUDA 4.2,开发驱动程序 301.32,Nsight 2.2,Windows 7 64 位,为 Win32 编译。以下代码段具有崩溃的 initDeviceParameters() 辅助函数:

namespace nvRadixSort
{
#include "radixsort.h"
#include "cudpp/cudpp.h"
#include <stdio.h>
#include <assert.h>

bool bManualCoalesce = false;
bool bUsePersistentCTAs = false;

void initDeviceParameters(bool keysOnly)
{
    int deviceID = -1;
    if(cudaSuccess == cudaGetDevice(&deviceID))
    {
        cudaDeviceProp devprop;
        cudaGetDeviceProperties(&devprop, deviceID);

        int smVersion = devprop.major * 10 + devprop.minor;

        // sm_12 and later devices don't need help with coalesce in reorderData kernel
        bManualCoalesce = (smVersion < 12);
        bUsePersistentCTAs = (smVersion < 20);

        if(bUsePersistentCTAs)
        {
            //Irrelevant. My setup is 2.1
        }
    }
}
}

这是相关的类代码:

#include <cuda_runtime_api.h>
#include "cudpp/cudpp.h"

namespace nvRadixSort
{

class RadixSort
{
    public:

    RadixSort(unsigned int maxElements, bool keysOnly = false)
        : mScanPlan(0),
          mNumElements(0),
          mTempKeys(0),
          mTempValues(0),
          mCounters(0),
          mCountersSum(0),
          mBlockOffsets(0)
    {
        // Allocate temporary storage
        initialize(maxElements, keysOnly);
    }
    protected: // data

    CUDPPHandle   mCudppContext;
    CUDPPHandle   mScanPlan;        // CUDPP plan handle for prefix sum

    unsigned int  mNumElements;     // Number of elements of temp storage allocated
    unsigned int *mTempKeys;        // Intermediate storage for keys
    unsigned int *mTempValues;      // Intermediate storage for values
    unsigned int *mCounters;        // Counter for each radix
    unsigned int *mCountersSum;     // Prefix sum of radix counters
    unsigned int *mBlockOffsets;    // Global offsets of each radix in each block

    protected: // methods

    void initialize(unsigned int numElements, bool keysOnly)
    {
        // initialize parameters based on present CUDA device
        initDeviceParameters(keysOnly);

        // Allocate temporary storage
        mNumElements = numElements;

        unsigned int numBlocks = ((numElements % (CTA_SIZE * 4)) == 0) ?
                                 (numElements / (CTA_SIZE * 4)) : (numElements / (CTA_SIZE * 4) + 1);
        unsigned int numBlocks2 = ((numElements % (CTA_SIZE * 2)) == 0) ?
                                  (numElements / (CTA_SIZE * 2)) : (numElements / (CTA_SIZE * 2) + 1);

        // Initialize scan
        cudppCreate(&mCudppContext);

        CUDPPConfiguration scanConfig;
        scanConfig.algorithm = CUDPP_SCAN;
        scanConfig.datatype  = CUDPP_UINT;
        scanConfig.op        = CUDPP_ADD;
        scanConfig.options   = CUDPP_OPTION_EXCLUSIVE | CUDPP_OPTION_FORWARD;
        cudppPlan(mCudppContext , &mScanPlan, scanConfig, 16 * numBlocks2, 1, 0);

        cudaMalloc((void **)&mTempKeys,     numElements * sizeof(unsigned int));
        if(!keysOnly)
            cudaMalloc((void **)&mTempValues,   numElements * sizeof(unsigned int));
        cudaMalloc((void **)&mCounters,     WARP_SIZE_ * numBlocks * sizeof(unsigned int));
        cudaMalloc((void **)&mCountersSum,  WARP_SIZE_ * numBlocks * sizeof(unsigned int));
        cudaMalloc((void **)&mBlockOffsets, WARP_SIZE_ * numBlocks * sizeof(unsigned int));

        checkCudaError("RadixSort::initialize()");
    }
}
4

1 回答 1

1

解决了这个问题。即使有可用的 CUDA 4.2 版本,我的 Visual Studio 项目仍然使用 CUDA 4.0 构建规则和工具。只需更改项目文件以使用新文件,就可以了。

于 2012-07-26T18:05:24.010 回答