我写了一个关于字符串比较的简单代码。代码如下所示。这很简单。只需比较字符串a和字符串b,如果对应的元素相同,则将5赋给新的矩阵s;如果对应的元素不同,则将-3赋给新的矩阵s。不存在编译错误。但结果不是我所期望的。请给我一些有用的建议。谢谢!
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include "book.h"
#define M 6
#define BLOCK_SIZE 30 // maximum 1024 threads per block
#define GRID_SIZE 30 // 900 blocks per grid
#define P (900 * 900)
void Init();
char *gpu_a;
char *gpu_b;
float *gpu_s;
float *cpu_s;
char cpu_a[6] = {'A', 'T', 'G', 'C', 'G', 'T'};
char cpu_b[6] = {'G', 'T', 'G', 'A', 'T', 'G'};
void cpu_Allocate1dArray()
{
//cpu_a = (char*) malloc( M * sizeof( char) );
//cpu_b = (char*) malloc( M * sizeof(char) );
cpu_s = (float*) malloc( M * sizeof( float) );
}
void gpu_Allocate1dArray()
{
cudaMalloc( (void**)&gpu_a, M * sizeof(char) );
cudaMalloc( (void**)&gpu_b, M * sizeof(char) );
cudaMalloc( (void**)&gpu_s, M * sizeof(float));
}
__global__ void mykernel( char *gpu_a, char *gpu_b, float *gpu_s)
{
int i , j , tid;
i = threadIdx.x + blockIdx.x * blockDim.x;
j = threadIdx.y + blockIdx.y * blockDim.y;
tid = i + j * blockDim.x * gridDim.x;
if ( tid < P)
{
if( gpu_a[i] == gpu_b[j])
{
gpu_s[tid] = 5;
}
else
gpu_s[tid] = -3;
}
}
int main()
{
int q;
cpu_Allocate1dArray();
gpu_Allocate1dArray();
Init();
dim3 gridDim;
dim3 blockDim;
blockDim.x = blockDim.y = BLOCK_SIZE;
gridDim.x = gridDim.y = GRID_SIZE;
cudaMemcpy( gpu_a, cpu_a, sizeof(char) * M, cudaMemcpyHostToDevice);
cudaMemcpy( gpu_b, cpu_b, sizeof(char) * M, cudaMemcpyHostToDevice);
mykernel<<<gridDim, blockDim>>>(gpu_a, gpu_b, gpu_s);
cudaMemcpy( cpu_s, gpu_s, sizeof(float)* M, cudaMemcpyDeviceToHost);
for (q = 0; q < M; q++)
printf("%f ", cpu_s[q]);
printf("\n");
//Free device memory
free(cpu_s);
cudaFree(gpu_s);
cudaFree(gpu_a);
cudaFree(gpu_b);
return 0;
}
void Init()
{
int i;
for (i = 0; i < M; i++)
cpu_s[i] = 0;
}
结果是: [Smith@server]$ ./test88.exe 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000