我正在使用以下代码来测试 CUDA NPP min-max 函数。
#include <string.h>
#include <fstream>
#include <iostream>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>
#include <string>
#include <math.h>
#include <assert.h>
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuComplex.h"
#include <cufft.h>
#include <cuda_runtime.h>
#include <npp.h>
#define Nz 256
#define Ny 280
int main(int argc, char** argv) {
struct cudaDeviceProp p;
cudaGetDeviceProperties(&p, 0);
printf("Device Name: %s\n", p.name);
Npp32f* d_img;
cudaMalloc((void**)&d_img, Nz*Ny * sizeof(Npp32f));
nppsSet_32f(1.0f, d_img, Nz*Ny);
int BufferSize;
Npp32f Max;
Npp32f Min;
nppsMinMaxGetBufferSize_32f(Nz*Ny,&BufferSize);
Npp8u *pScratch;
cudaMalloc((void **)(&pScratch), BufferSize);
nppsMinMax_32f(d_img,Nz*Ny,&Min,&Max,pScratch);
printf("Max:%g, Min:%g\n", (float)Max, (float)Min);
cudaFree(d_img);
cudaFree(pScratch);
}
设备数组中的所有元素都设置为 1,但我得到以下输出。
Max:1.12104e-44, Min:0