我尝试使用 CUDA 编写 GPU 程序。以下是我的功能:
__global__ static void
histogram_gpu(int * hist_out, unsigned char * img_in, int img_size, int nbr_bin){
int i;
const int bid = blockIdx.x;
const int tid = threadIdx.x;
// for ( i = 0; i < img_size; i ++){
// hist_out[img_in[i]] ++;
// }
for (i = bid*THREAD_NUM + tid; i < img_size; i += BLOCK_NUM*THREAD_NUM) {
hist_out[img_in[i]]++;
}
}
当我在主函数中调用此函数时,会发生错误:
error: ‘blockIdx’ was not declared in this scope
我在我的 MAC 机器上使用 CUDA 5.0,下面是 Makefile:
OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:])
OSLOWER = $(shell uname -s 2>/dev/null | tr [:upper:] [:lower:])
# Flags to detect 32-bit or 64-bit OS platform
OS_SIZE = $(shell uname -m | sed -e "s/i.86/32/" -e "s/x86_64/64/")
OS_ARCH = $(shell uname -m | sed -e "s/i386/i686/")
# These flags will override any settings
ifeq ($(i386),1)
OS_SIZE = 32
OS_ARCH = i686
endif
ifeq ($(x86_64),1)
OS_SIZE = 64
OS_ARCH = x86_64
endif
# Flags to detect either a Linux system (linux) or Mac OSX (darwin)
DARWIN = $(strip $(findstring DARWIN, $(OSUPPER)))
# Location of the CUDA Toolkit binaries and libraries
CUDA_PATH ?= /Developer/NVIDIA/CUDA-5.0
CUDA_INC_PATH ?= $(CUDA_PATH)/include
CUDA_BIN_PATH ?= $(CUDA_PATH)/bin
ifneq ($(DARWIN),)
CUDA_LIB_PATH ?= $(CUDA_PATH)/lib
else
ifeq ($(OS_SIZE),32)
CUDA_LIB_PATH ?= $(CUDA_PATH)/lib
else
CUDA_LIB_PATH ?= $(CUDA_PATH)/lib64
endif
endif
# Common binaries
NVCC ?= $(CUDA_BIN_PATH)/nvcc
GCC ?= g++
# Extra user flags
EXTRA_NVCCFLAGS ?=
EXTRA_LDFLAGS ?=
# CUDA code generation flags
GENCODE_SM10 := -gencode arch=compute_10,code=sm_10
GENCODE_SM20 := -gencode arch=compute_20,code=sm_20
GENCODE_SM30 := -gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35
GENCODE_FLAGS := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)
GENCODE_FLAGS := $(GENCODE_SM10) $(GENCODE_SM20) $(GENCODE_SM30)
# OS-specific build flags
# ifneq ($(DARWIN),)
# LDFLAGS := -Xlinker -rpath $(CUDA_LIB_PATH) -L$(CUDA_LIB_PATH) -lcudart -lcublas -lcuda -lcufft -ltlshook
# CCFLAGS := -arch $(OS_ARCH)
# else
# ifeq ($(OS_SIZE),32)
# LDFLAGS := -L$(CUDA_LIB_PATH) -lcudart
# CCFLAGS := -m32
# else
LDFLAGS := -L$(CUDA_LIB_PATH) -lcudart -lcublas -lcuda -lcufft -ltlshook
CCFLAGS := -m64
# endif
# endif
# OS-architecture specific flags
ifeq ($(OS_SIZE),32)
NVCCFLAGS := -m32
else
NVCCFLAGS := -m64
endif
# Debug build flags
ifeq ($(dbg),1)
CCFLAGS += -g
NVCCFLAGS += -g -G
TARGET := debug
else
TARGET := release
endif
# Common includes and paths for CUDA
INCLUDES := -I$(CUDA_INC_PATH) -I. -I.. -I../../common/inc
# Add source files here
EXECUTABLE := 5kk70-assignment-gpu
# Cuda source files (compiled with cudacc)
CUFILES :=
# C/C++ source files (compiled with gcc / c++)
CCFILES := main.cpp histogram-equalization.cu contrast-enhancement.cu
################################################################################
# Rules and targets
# All Phony Targets
.PHONY : everything clean
# Default starting position
everything : $(EXECUTABLE)
# Common includes and paths for CUDA
# INCLUDES := -I$(CUDA_INC_PATH) -I. -I.. -I$(CUDA_INC_PATH)/samples/common/inc/
# Clean OBJECTS
clean :
rm -f $(EXECUTABLE) $(OBJ)
$(EXECUTABLE) : $(CCFILES)
$(NVCC) -o $@ $^ $(INCLUDES) $(LDFLAGS) $(EXTRA_LDFLAGS) $(GENCODE_FLAGS)
我的代码有什么问题?