1

在编译第 5 行时,我在 CUDA C 编程中遇到了内核问题。我遇到了一个"expected an identifier"错误。为什么会这样?

我的内核函数如下:

__global__ void txz_kernel(float *txz,float *vz)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x; --> error
txz[offset]=txz[offset]+vz[offset];
}

这是完整的代码:

#include  "../common/book.h"
#include "conio.h"
#include "cuda.h"
#include <fstream>
#include <sstream>
#include <iostream>
#include <assert.h>
#include "../common/book.h"
#include <fstream>
#define DIMX 320
#define DIMZ 320
#define PI 3.1415926535897932f

__global__ void txz_kernel(float *txz,float *vz)
{
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x; --> error
txz[offset]=txz[offset]+vz[offset];
}

int  main( void ) {
    float              *txz;
    float              *vz;

        HANDLE_ERROR( cudaMalloc( (void**)&txz, DIMX * DIMZ * sizeof(float)));
        HANDLE_ERROR( cudaMalloc( (void**)&vz, DIMX * DIMZ * sizeof(float)));

        float *tempvz = (float*)malloc( sizeof(float)*(DIMX*DIMZ));    
        float *temptxz = (float*)malloc( sizeof(float)*(DIMX*DIMZ)); 

    for (int i=0; i<DIMX; i++) {
        for (int j=0; j<DIMZ; j++) {
        int ij=DIMX*j + i; 
        tempvz[ij]=200.0;
        temptxz[ij]=100.0;
        }
    }

    for (int i=0; i<DIMX; i++) {
        for (int j=(121); j<DIMZ; j++) {
        int ij=DIMX*j + i;       
        tempvz[ij]=250.0;
        temptxz[ij]=150.0;
        }
    }
            HANDLE_ERROR( cudaMemcpy( vz, tempvz,sizeof(float)*(DIMX*DIMZ),cudaMemcpyHostToDevice ) );
            HANDLE_ERROR( cudaMemcpy( txz, temptxz,sizeof(float)*(DIMX*DIMZ),cudaMemcpyHostToDevice ) );
                dim3    blocks(DIMX/16,DIMZ/16);
                dim3    threads(16,16);
            txz_kernel<<<blocks,threads>>>(txz,vz) ;            
}
4

1 回答 1

4

您必须将此代码放入具有 .cu 文件扩展名的文件中,并使用 nvcc 进行编译。nvcc 使用文件扩展名来确定任何给定输入文件的编译轨迹,如果文件没有 .cu 文件扩展名,则假定输入中没有设备代码并将其传递给主机编译器。代码没有错,只是你没有正确编译它。

让我们从文件中的内核代码开始.cpp

> type txzkernel.cpp
__global__ void txz_kernel(float *txz, float *vz)
{
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;
    txz[offset]=txz[offset]+vz[offset];
}

现在让我们尝试用 nvcc 编译它:

> nvcc -arch=sm_20 -Xptxas="-v" -c txzkernel.cpp
txzkernel.cpp
txzkernel.cpp(1) : error C2144: syntax error : 'void' should be preceded by ';'
txzkernel.cpp(1) : error C4430: missing type specifier - int assumed. Note: C++
does not support default-int
txzkernel.cpp(3) : error C2065: 'threadIdx' : undeclared identifier
txzkernel.cpp(3) : error C2228: left of '.x' must have class/struct/union
        type is ''unknown-type''
txzkernel.cpp(3) : error C2065: 'blockIdx' : undeclared identifier
txzkernel.cpp(3) : error C2228: left of '.x' must have class/struct/union
        type is ''unknown-type''
txzkernel.cpp(3) : error C2065: 'blockDim' : undeclared identifier
txzkernel.cpp(3) : error C2228: left of '.x' must have class/struct/union
        type is ''unknown-type''
txzkernel.cpp(4) : error C2065: 'threadIdx' : undeclared identifier
txzkernel.cpp(4) : error C2228: left of '.y' must have class/struct/union
        type is ''unknown-type''
txzkernel.cpp(4) : error C2065: 'blockIdx' : undeclared identifier
txzkernel.cpp(4) : error C2228: left of '.y' must have class/struct/union
        type is ''unknown-type''
txzkernel.cpp(4) : error C2065: 'blockDim' : undeclared identifier
txzkernel.cpp(4) : error C2228: left of '.y' must have class/struct/union
        type is ''unknown-type''
txzkernel.cpp(5) : error C2065: 'blockDim' : undeclared identifier
txzkernel.cpp(5) : error C2228: left of '.x' must have class/struct/union
        type is ''unknown-type''
txzkernel.cpp(5) : error C2065: 'gridDim' : undeclared identifier
txzkernel.cpp(5) : error C2228: left of '.x' must have class/struct/union
        type is ''unknown-type''

结果是很多语法错误,因为主机编译器(在本例中为 Microsoft Visual C++)不理解内核代码中的任何 CUDA 语言扩展。

现在重命名文件,使其具有正确的扩展名并再次编译:

> rename txzkernel.cpp txzkernel.cu
> nvcc -arch=sm_20 -Xptxas="-v" -c txzkernel.cu
txzkernel.cu
tmpxft_000012dc_00000000-3_txzkernel.cudafe1.gpu
tmpxft_000012dc_00000000-8_txzkernel.cudafe2.gpu
txzkernel.cu
ptxas info    : Compiling entry function '_Z10txz_kernelPfS_' for 'sm_20'
ptxas info    : Function properties for _Z10txz_kernelPfS_
    0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads
ptxas info    : Used 5 registers, 40 bytes cmem[0]
tmpxft_000012dc_00000000-3_txzkernel.cudafe1.cpp
tmpxft_000012dc_00000000-14_txzkernel.ii

没有错误。如果您使用 nvcc 编译内核代码并将内核代码放入具有适当扩展名的文件中,则此代码将无需修改即可编译。

于 2013-07-04T07:35:40.357 回答