我在理解如何使用内核循环 3 维数组时遇到了一些问题。
这是我到目前为止的代码:
#include <iostream>
#include <ctime>
#include <cuda.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>
using namespace std;
int main()
{
// Array properties
const int width = 1;
const int height = 1;
const int depth = 1;
// Declaration of arrays
float h_A[width][height][depth];
float h_B[width][height][depth];
float h_C[width][height][depth] = {{{0}}};
// Fill up arrays
srand(time(0));
for(int i = 0; i < width; i++){
for(int j = 0; j < height; j++){
for(int z = 0; z < depth; z++){
h_A[i][j][z] = rand()%1000;
h_B[i][j][z] = rand()%1000;
}
}
}
// Declaration of device pointers
cudaPitchedPtr d_A, d_B, d_C;
// Allocating memory in GPU
cudaExtent extent = make_cudaExtent(width*sizeof(float),height,depth);
cudaMalloc3D(&d_A, extent);
cudaMalloc3D(&d_B, extent);
cudaMalloc3D(&d_C, extent);
// Copying memory from host to device
cudaMemcpy3DParms p;
p.srcPtr = make_cudaPitchedPtr(&h_A, sizeof(float)*width, height, depth);
p.extent = extent;
p.kind = cudaMemcpyHostToDevice;
p.dstPtr = d_A;
cudaMemcpy3D(&p);
p.dstPtr = d_B;
cudaMemcpy3D(&p);
p.dstPtr = d_C;
cudaMemcpy3D(&p);
system("pause");
return 0;
}
如何制作一个循环遍历数组中每个元素并将它们添加在一起的内核?