__host__ void generateVector(int count) {
A = new int[count];
B = new int[count];
for (int i = 0; i < count; i++) {
A[i] = rand_from_0_to_100_gen();
B[i] = rand_from_0_to_100_gen();
}
}
我在 CPU 端创建了数组,并使用此函数尝试对这两个数组求和:
__host__ void vectorSum(const int *dA, const int* dB, int count, int* dC){
cudaMalloc((void**) &dA, count * sizeof(int));
cudaMalloc((void**) &dB, count * sizeof(int));
cudaMemcpy(A, dA , count * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(B, dA , count * sizeof(int), cudaMemcpyHostToDevice);
int tid = 0;
while(tid < count){
tid++;
dC[tid] = dA[tid] + dB[tid];
}
cout << "C: {";
for (int i = 0; i < count; i++) {
cout << dC[i];
cout << ",";
}
cout << "}";
}
我是在 GPU 还是 CPU 上进行此计算。我对此表示怀疑。
其次,我在 main 中这样调用这个函数:
vectorSum(dA,dB,numOfData,dC);
但说 dC 在其值设置之前使用。为什么?在计算之前我必须设置什么。
整个代码:
using namespace std;
#include "cuda_runtime.h"
#include <thrust/host_vector.h>
#include <stdio.h>
#include <iostream>
#include <stdlib.h>
int *A;
int *B;
int rand_from_0_to_100_gen(void) {
return rand() % 100;
}
__host__ void generateVector(int count) {
A = new int[count];
B = new int[count];
for (int i = 0; i < count; i++) {
A[i] = rand_from_0_to_100_gen();
B[i] = rand_from_0_to_100_gen();
}
}
__host__ void displayVector(int count) {
generateVector(count);
cout << "A: {";
for (int i = 0; i < count; i++) {
cout << A[i];
cout << ",";
}
cout << "}";
cout << "\n";
cout << "B: {";
for (int i = 0; i < count; i++) {
cout << B[i];
cout << ",";
}
cout << "}";
}
__host__ void vectorSum(const int *dA, const int* dB, int count, int* dC){
cudaMalloc((void**) &dA, count * sizeof(int));
cudaMalloc((void**) &dB, count * sizeof(int));
cudaMemcpy(A, dA , count * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(B, dB , count * sizeof(int), cudaMemcpyHostToDevice);
int tid = 0;
while(tid < count){
tid++;
dC[tid] = dA[tid] + dB[tid];
}
cout << "C: {";
for (int i = 0; i < count; i++) {
cout << dC[i];
cout << ",";
}
cout << "}";
}
__host__ void vectorDiff(const int *dA, const int* dB, int count, int* dC){
}
int main(void) {
int dev, numOfData;
const int *dA;
const int *dB;
int *dC;
cudaGetDevice(&dev);
cout << "Device with ID " << dev << " is defined\n";
cout << "Please enter the number of data:";
cin >> numOfData;
displayVector(numOfData);
vectorSum(dA,dB,numOfData,dC);
return 0;
}