我正在使用cudaMallocPitch
andcudaMemcpy2D
用于二维数组。即使我无法正确获得输出,我也不确定我的编码是否正确。有人可以帮忙吗?任何人都可以调试我的错误吗?提前致谢。
#include<stdio.h>
#include<cuda.h>
#define siz 4*sizeof(int)
__global__ void addmatrix(int *m1,int *m2,size_t pitch)
{
int r=threadIdx.x;
int *r1=m1+r*pitch;
int *r2=m2+r*pitch;
int c;
for(c=1;c<=4;c++)
{
r1[c]+=r2[c];
}
}
int main()
{
int i,j;
int **m1_c,**m2_c;
int *m1_d,*m2_d;
size_t pitch;
cudaError_t err;
m1_c=(int **)malloc(4*sizeof(int *));
for(i=1;i<=4;i++)
{
m1_c[i]=(int *)malloc(siz);
}
m2_c=(int **)malloc(4*sizeof(int *));
for(i=1;i<=4;i++)
{
m2_c[i]=(int *)malloc(siz);
}
for(i=1;i<=4;i++)
{
for(j=1;j<=4;j++)
{
m1_c[i][j]=rand()%10;
m2_c[i][j]=rand()%10;
}
}
for(i=1;i<=4;i++)
{
for(j=1;j<=4;j++)
{
printf("%d\t",m1_c[i][j]);
}
printf("\n");
}
printf("\n\n");
for(i=1;i<=4;i++)
{
for(j=1;j<=4;j++)
{
printf("%d\t",m2_c[i][j]);
}
printf("\n");
}
err=cudaMallocPitch((void **)&m1_d,&pitch,siz,siz);
err=cudaMallocPitch((void **)&m2_d,&pitch,siz,siz);
err=cudaMemcpy2D(m1_d,pitch,m1_c,siz,siz,4,cudaMemcpyHostToDevice);
err=cudaMemcpy2D(m2_d,pitch,m2_c,siz,siz,4,cudaMemcpyHostToDevice);
dim3 grid(1);
dim3 block(16);
addmatrix<<<grid,block>>>(m1_d,m2_d,siz);
cudaMemcpy2D(m1_c,siz,m1_d,pitch,siz,4,cudaMemcpyDeviceToHost);
for(i=1;i<=4;i++)
{
for(j=1;j<=4;j++)
{
printf("%d\t",m1_c[i][j]);
}
printf("\n");
}
err=cudaFree(m1_d);
err=cudaFree(m2_d);
err=cudaDeviceReset();
}