我试图将 CUDA 项目移植到广泛使用 C++ 模板的 ROCm 平台。在此过程中,我收到以下编译错误
/root/warp-ctc/include/detail/gpu_ctc.h:381:5: error: no matching function for call to 'hipLaunchKernelGGL'
hipLaunchKernelGGL((prepare_stable_SM_kernel<ProbT, VT>), dim3(grid_size), dim3(NT), 0, stream_, ctc_helper::identity<ProbT>(), probs_,
^~~~~~~~~~~~~~~~~~
.....
.....
/opt/rocm/hip/include/hip/hcc_detail/functional_grid_launch.hpp:138:13: note: candidate function [with Args = <ctc_helper::identity<float, float>, float *,
float *, int, int>, F = void (*)(ctc_helper::identity<float, float>, float *, float *, int, int)] not viable: no overload of 'prepare_stable_SM_kernel'
matching 'void (*)(ctc_helper::identity<float, float>, float *, float *, int, int)' for 1st argument
inline void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
hipLaunchKernelGGL((prepare_stable_SM_kernel<ProbT, VT>),
dim3(grid_size),
dim3(NT),
0,
stream_,
ctc_helper::identity<ProbT>(),
probs_,
denoms_,
out_dim_,
num_elements);
其中hipLaunchKernelGGL
定义为
template <typename... Args, typename F = void (*)(Args...)>
inline void hipLaunchKernelGGL(F kernel,
const dim3& numBlocks,
const dim3& dimBlocks,
std::uint32_t sharedMemBytes,
hipStream_t stream,
Args... args) {
// ...
// ...
}
并prepare_stable_SM_kernel
定义为
template <typename ProbT, int VT = 1, typename Op>
__global__ void prepare_stable_SM_kernel(Op f, ProbT* probs,
const ProbT* const col_max,
int alphabet_size,
int count) {
// ...
}
任何人请通过提供一些提示来解决此问题来帮助我。