0

我正在尝试在具有 CUDA 7.5 的远程计算机上使用 Nvidia Tesla M2090运行cudaSiftin CudaSift项目。这台机器有 4 个这样的 GPU,但我从调试中很确定初始化是正确完成的,无论如何这是代码:

void InitCuda(int devNum)
{
  int nDevices;
  cudaGetDeviceCount(&nDevices);
  if (!nDevices) {
    std::cerr << "No CUDA devices available" << std::endl;
    return;
  }
  devNum = std::min(nDevices-1, devNum);
  deviceInit(devNum);  
  cudaDeviceProp prop;
  cudaGetDeviceProperties(&prop, devNum);
  printf("Device Number: %d\n", devNum);
  printf("  Device name: %s\n", prop.name);
  printf("  Memory Clock Rate (MHz): %d\n", prop.memoryClockRate/1000);
  printf("  Memory Bus Width (bits): %d\n", prop.memoryBusWidth);
  printf("  Peak Memory Bandwidth (GB/s): %.1f\n\n",
     2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
}

我使用cmake生成makefile,make文件生成cudaSift没有任何错误。

无论如何,当我运行它时,会返回以下错误:

safeCall() Runtime API error in file </ghome/rzhengac/Downloads/CudaSift-Maxwell/cudaSiftH.cu>, line 42 : invalid device symbol.

在第 42 行cudaSiftH.cu是:

safeCall(cudaMemcpyToSymbol(d_PointCounter, &totPts, sizeof(int)));

在哪里:

__device__ unsigned int d_PointCounter[1];
int totPts = 0;

InitCuda这是由(确保初始化期间一切正常)打印的代码:

Device Number: 0
  Device name: Tesla M2090
  Memory Clock Rate (MHz): 1848
  Memory Bus Width (bits): 384
  Peak Memory Bandwidth (GB/s): 177.4

解决方案:

正如评论中所建议的,我正在使用错误的架构进行编译:我不得不将每个更改为sm_35,结果如下:CMakeList.txtsm_20

cmake_minimum_required(VERSION 2.6)

project(cudaSift)
set(cudaSift_VERSION_MAJOR 2)
set(cudaSift_VERSION_MINOR 0)
set(cudaSift_VERSION_PATCH 0)

set(CPACK_PACKAGE_VERSION_MAJOR "${cudaSift_VERSION_MAJOR}")
set(CPACK_PACKAGE_VERSION_MINOR "${cudaSift_VERSION_MINOR}")
set(CPACK_PACKAGE_VERSION_PATCH "${cudaSift_VERSION_PATCH}")
set(CPACK_GENERATOR "ZIP")
include(CPack)

find_package(OpenCV REQUIRED)
find_package(CUDA)
if (NOT CUDA_FOUND)
  message(STATUS "CUDA not found. Project will not be built.")
endif(NOT CUDA_FOUND)

if (WIN32)
  set(EXTRA_CXX_FLAGS "/DVERBOSE /D_CRT_SECURE_NO_WARNINGS ")
  list(APPEND CUDA_NVCC_FLAGS "-arch=sm_20;--compiler-options;-O2;-DVERBOSE") 
endif()
if (UNIX)
  if (APPLE)
    set(EXTRA_CXX_FLAGS "-DVERBOSE -msse2")
    list(APPEND CUDA_NVCC_FLAGS "-arch=sm_20;--compiler-options;-O2;-DVERBOSE") 
  else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -msse2 ")
    list(APPEND CUDA_NVCC_FLAGS "-lineinfo;--compiler-options;-O2;-DVERBOSE") 
  endif()
endif()

set(cuda_sources
  # dynamic.cu
  cudaImage.cu  
  cudaImage.h  
  cudaSiftH.cu 
  cudaSiftH.h  
  matching.cu  
  cudaSiftD.h  
  cudaSift.h  
  cudautils.h
)  

set(sources
  geomFuncs.cpp  
  mainSift.cpp
)

include_directories(
  ${CMAKE_CURRENT_SOURCE_DIR} 
)

SET(CUDA_SEPARABLE_COMPILATION ON)

cuda_add_executable(cudasift ${cuda_sources} ${sources} OPTIONS -arch=sm_20)

set_target_properties(cudasift PROPERTIES
  COMPILE_FLAGS "${EXTRA_CXX_FLAGS}"               
)

target_link_libraries(cudasift
  /usr/local/cuda/lib64/libcudadevrt.a ${OpenCV_LIBS} 
)

install(FILES 
  ${cuda_sources} 
  ${sources}
  cudaSiftD.cu
  CMakeLists.txt
  Copyright.txt
  DESTINATION .
)
install(FILES data/left.pgm data/righ.pgm
  DESTINATION data
)
4

0 回答 0