我在知道执行时间时遇到了一点麻烦,我已经准备好尝试使用事件、time.h 库和 cutTimer,问题是它只打印到单位矩阵,程序的其余部分是大矩阵,内核调用和结果向量不打印。如果我删除它t1 = clock()
并且t2 = clock()
它打印得很好,我不知道我做错了什么或者编码是否错误。
# include <iostream>
# include <conio.h>
# include <time.h>
using namespace std;
# define N 7
__global__ void mult(int *MAT, int *VEC, int *SOL, int b) {
int bx = blockIdx.x;
int tx = threadIdx.x;
int i = 32 * bx + tx;
for (int j = 0; j < b; j++) {
SOL[i] = ((MAT[i * b + j] * VEC[j]) + SOL[i]) % 2;
}
}
int main () {
int i, j, k, a, s, b;
int Q1[100][100], Q[100][100], Qg[100][100], MI[100][100];
int MAT[N][N], VEC[N], SOL[N];
int *MAT_dev, *VEC_dev, *SOL_dev;
int coef, aux[N], element;
clock_t t1, t2;
cout << "Size of the matrix: ";
cin >> k;
cout << endl << endl;
cont = 0;
a = k + 2;
b = (k * 2) + 1;
size_t nBytes = b * b * sizeof(int);
cudaMalloc((void**)&MAT_dev, nBytes);
cudaMalloc((void**)&VEC_dev, nBytes);
cudaMalloc((void**)&SOL_dev, nBytes);
//-----------------------------------------
//----- MATRIX
//-----------------------------------------
// Matrix Q1.
for (i = 0; i < a; i++) {
for (j = 0; j < a; j++) {
Q1[i][j] = 0;
}
}
//Matrix Q1 XOR.
Q1[0][1] = 1;
for (i = 0; i < k; i ++) {
for (j = 0; j < k; j++) {
Q1[i + 1][j + 1] = Q1[i][j] ^ Q1[i][j + 2];
}
}
// Q1 to Q
for (i = 0; i < k; i++) {
for (j = 0; j < k; j++) {
Q[i][j] = Q1[i][j + 1];
}
}
// Matrix Inverse Q.
for (i = 0; i < k; i++)
for (j = k; j < 2*k; j++) {
if (i == (j-k))
Q[i][j] = 1;
else
Q[i][j] = 0;
}
// Iterations
for (s = 0; s < k; s++) {
element = Q[s][s];
for (j = 0; j < 2*k; j++)
Q[s][j] = Q[s][j] / element;
for (i = 0; i < k; i++) {
if (i == s)
;
else
{
coef = Q[i][s];
for (j = 0; j < 2*k; j++)
aux[j] = Q[s][j] * (coef*-1);
for (j = 0; j < 2*k; j++)
Q[i][j] = abs(Q[i][j] + aux[j]) % 2;
}
}
}
//Print Matrix Q Inverse.
cout << endl << endl;
cout << "Inverse of Q.\n\n";
for (i = 0; i < k; i++) {
for (j = k; j < k * 2; j++) {
cout << Q[i][j] << " ";
}
cout << endl;
}
cout << endl << endl;
// Matrix Q Hat
cout << "Q Hat. \n\n";
for (i = 0; i < k; i++) {
for (j = 0; j < k + 1; j++) {
Qg[i][j] = Q[i + 1][j + k];
if (i == (k - 1) || j == k)
Qg[i][j] = 0;
if (i == (k - 1) && j == k)
Qg[i][j] = 1;
cout << Qg[i][j]<< " ";
}
cout << endl;
}
cout << endl << endl;
// Matrix Identity
cout << "Matrix Identity.\n\n";
for (i = 0; i < k + 1; i++) {
for (j = 0; j < k + 1; j++) {
if (i == j)
MI[i][j] = 1;
else
MI[i][j] = 0;
cout << MI[i][j] << " ";
}
cout << endl;
}
cout << endl << endl;
//-----------------------------------------
//----- Big Marix
//-----------------------------------------
// Big Matrix of Ceros
for(i = 0; i < b; i++) {
for (j = 0; j < b; j++) {
MAT[i][j] = 0;
}
}
// Big Matrix - Matrix Inverse
for (i = 0; i < k; i++) {
for (j = 0; j < k; j++) {
MAT[i][j] = Q[i][j + k];
}
}
// Big Matrix - Matrix Hat
for (i = 0; i < k; i++) {
for (j = 0; j < k + 1; j++) {
MAT[i][j + k] = Qg[i][j];
}
}
// Big Matrix - Matrix Identity
for (i = 0; i < b; i++) {
for (j = 0; j < (k + 1); j++) {
MAT[i + k][j] = MI[i][j];
}
}
// Print Big Matrix
cout << "Big Matrix. \n\n";
for (i = 0; i < b; i++) {
for (j = 0; j < b; j++) {
cout << MAT[i][j] << " ";
}
cout << endl;
}
cout << endl << endl;
//-----------------------------------------
//----- VECTOR
//-----------------------------------------
// VECTOR.
cout << "Vector: " << endl;
for (i = 0; i < b; i++) {
cin >> VEC[i];
}
cout << endl << endl;
//-----------------------------------------
//----- Assign and Invocation
//-----------------------------------------
t1 = clock();
cudaMemcpy(MAT_dev, MAT, nBytes, cudaMemcpyHostToDevice);
cudaMemcpy(VEC_dev, VEC, nBytes, cudaMemcpyHostToDevice);
mult<<< 1, b >>>(MAT_dev, VEC_dev, SOL_dev, b);
cudaMemcpy(SOL, SOL_dev, nBytes, cudaMemcpyDeviceToHost);
for (i = 0; i < b; i++) {
cout << SOL[i] << " ";
}
cout << endl;
for (i = 0; i < b; i++) {
VEC[i] = SOL[i];
}
//-----------------------------------------
//----- Free Memory
//-----------------------------------------
cudaFree(MAT_dev);
cudaFree(VEC_dev);
cudaFree(SOL_dev);
t2 = clock();
cout << "Time of Execution: " << t2 - t1;
cout << endl;
system("PAUSE");
return 0;
}
谢谢您的帮助。