0

我需要编译一个使用 cmake 动态并行的 CUDA 代码。代码是:

#include <stdio.h>

__global__ void childKernel() {
    printf("Hello ");    
}

__global__ void parentKernel() {    
    childKernel<<<1,1>>>();
    cudaDeviceSynchronize();    
    printf("World!\n");    
}

int main(int argc, char **argv){

    parentKernel<<<1, 1>>>();

    return 0;
}

并且cmake如下:

cmake_minimum_required(VERSION 2.8)

find_package(CUDA QUIET REQUIRED)

include_directories(/usr/include)
include_directories(/usr/local/cuda/lib)
include_directories(/usr/local/cuda-8.0/lib)
include_directories(/usr/local/cuda/include)
include_directories(/usr/local/cuda-8.0/include)

set(CUDA_SEPARABLE_COMPILATION ON)
find_package(CUDA QUIET REQUIRED)
set(CUDA_PROPAGATE_HOST_FLAGS OFF)

set(
    CUDA_NVCC_FLAGS 
    ${CUDA_NVCC_FLAGS}; 
    -arch=compute_35 -rdc=true -lcudadevrt
)

cuda_add_executable(
    prova 
    test.cu
)

我尝试使用传递的 nvcc 直接编译代码-arch=compute_35 -rdc=true -lcudadevrt并且它编译完美,但是当我尝试使用 cmake 编译时它返回以下错误:

CMakeFiles/prova.dir/prova_intermediate_link.o: In function `__cudaRegisterLinkedBinary_66_tmpxft_00001101_00000000_13_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37':
link.stub:(.text+0xcc): undefined reference to `__fatbinwrap_66_tmpxft_00001101_00000000_13_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37'
link.stub:(.text+0xd0): undefined reference to `__fatbinwrap_66_tmpxft_00001101_00000000_13_cuda_device_runtime_compute_62_cpp1_ii_8b1a5d37'
collect2: error: ld returned 1 exit status
CMakeFiles/prova.dir/build.make:200: recipe for target 'prova' failed
make[2]: *** [prova] Error 1
CMakeFiles/Makefile2:67: recipe for target 'CMakeFiles/prova.dir/all' failed
make[1]: *** [CMakeFiles/prova.dir/all] Error 2
Makefile:83: recipe for target 'all' failed
make: *** [all] Error 2
4

2 回答 2

1

您看到的未定义符号在libcudadevrt.a. 我看到您告诉 CMake 链接到它,但编译器似乎找不到它。在命令行上试试这个:

VERBOSE=1 make

并检查输出以查看您是否正在搜索/usr/local/cuda/lib64库。

这可能无关,但我也注意到您正在指示 CMake 搜索/usr/local/cuda/lib/usr/local/cuda-8.0/lib查找头文件。这是不正确的,因为这些目录只包含库。

于 2017-11-20T22:07:59.243 回答
1

感谢您的回答,我尝试使用 Tegra X2 (compute_62) 的正确计算能力,并检查了详细 make 的输出,发现以下输出:

CMakeFiles/prova.dir/prova_generated_test.cu.o CMakeFiles/prova.dir/prova_intermediate_link.o  -o prova -rdynamic /usr/local/cuda-8.0/lib64/libcudart_static.a -lpthread -ldl -lrt

似乎链接器尝试搜索 /usr/local/cuda-8.0/lib64/libcudart_static.a 但不是 libcudadevrt.a

于 2017-11-21T09:55:12.077 回答