我在为点对点通信设置 2 个 GPU 时遇到了一些困难。我正在使用 Cuda 4.0 并使用 fortran 进行编程。PGI 编译器
我编写了一个程序,确认我的节点上有 4 个 GPU 可用。
我决定使用其中两个,但出现以下错误: 0: DEALLOCATE: invalid device pointer。
subroutine directTransfer()
use cudafor
implicit none
integer, parameter :: N = 4*1024*1024
real, pinned, allocatable :: a(:), b(:)
real, device, allocatable :: a_d(:), b_d(:)
!these hold free and total memory before and after
!allocation, used to verify allocation happening on proper devices
integer (int_ptr_kind()),allocatable ::
& freeBefore(:), totalBefore(:),
& freeAfter(:), totalAfter(:)
integer :: istat, nDevices, i, accessPeer, timingDev
type(cudaDeviceProp)::prop
type(cudaEvent)::startEvent,stopEvent
real :: time
!allocate host arrays
allocate(a(N), b(N))
allocate(freeBefore(0:nDevices -1),
& totalBefore(0:nDevices -1))
allocate(freeAfter(0:nDevices -1),
& totalAfter(0:nDevices -1))
write(*,*) 'Start!'
!get devices ionfo (including total and free memory)
!before allocation
istat = cudaGetDeviceCount(nDevices)
if(nDevices < 2) then
write(*,*) 'Need at least two CUDA capable devices'
stop
end if
write(*,"('Number of CUDA-capable devices: ',
& i0, /)"),nDevices
do i = 0, nDevices - 1
istat = cudaGetDeviceProperties(prop, i)
istat = cudaSetDevice(i)
istat = cudaMemGetInfo(freeBefore(i), totalBefore(i))
end do
!!!Here is the trouble zone!!!!
istat = cudaSetDevice(0)
allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
deallocate(freeBefore, totalBefore,freeAfter,totalAfter)
deallocate(a,b,a_d,b_d)
end subroutine directTransfer
有了以下我没有错误:
istat = cudaSetDevice(0)
allocate(a_d(N))
!istat = cudaSetDevice(1)
!allocate(b_d(N))
有了这个,也没有错误:
!istat = cudaSetDevice(0)
!allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
但是这个返回错误
istat = cudaSetDevice(0)
allocate(a_d(N))
istat = cudaSetDevice(1)
allocate(b_d(N))
所以看来我无法设置 2GPU 来启动我的程序。你能帮我理解为什么不能设置 2GPU 并提示解决这个问题吗?