I'm currently attempting to write an algorithm for optimizing matrix multiplication over GF(2) using bit-packing. Both matrices A
and B
are provided in column major order so I start by copying A
into row-major order and then packing the values into 8-bit integers and using parity checking to speed up operations. I need to be able to test square matrices of up to 2048x2048, however, my current implementation provides the correct answer up to 24x24 and then fails to compute the correct result. Any help would be appreciated.
//Method which packs an array of integers into 8 bits
uint8_t pack(int *toPack) {
int i;
uint8_t A;
A = 0;
for (i = 0; i < 8; i++) {
A = (A << 1) | (uint8_t)toPack[i];
}
return A;
}
//Method for doing matrix multiplication over GF(2)
void matmul_optimized(int n, int *A, int *B, int *C) {
int i, j, k;
//Copying values of A into a row major order matrix.
int *A_COPY = malloc(n * n * sizeof(int));
int copy_index = 0;
for (i = 0; i < n; i++) {
for (j = 0; j < n; j++) {
A_COPY[copy_index] = A[i + j * n];
copy_index++;
}
}
//Size of the data data type integers will be packed into
const int portion_size = 8;
int portions = n / portion_size;
//Pointer space reserved to store packed integers in row major order
uint8_t *compressedA = malloc(n * portions * sizeof(uint8_t));
uint8_t *compressedB = malloc(n * portions * sizeof(uint8_t));
int a[portion_size];
int b[portion_size];
for (i = 0; i < n; i++) {
for (j = 0; j < portions; j++) {
for (k = 0; k < portion_size; k++) {
a[k] = A_COPY[i * n + j * portion_size + k];
b[k] = B[i * n + j * portion_size + k];
}
compressedA[i * n + j] = pack(a);
compressedB[i * n + j] = pack(b);
}
}
//Calculating final matrix using parity checking and XOR on A and B
int cij;
for (i = 0; i < n; ++i) {
for (j = 0; j < n; ++j) {
int cIndex = i + j * n;
cij = C[cIndex];
for (k = 0; k < portions; ++k) {
uint8_t temp = compressedA[k + i * n] & compressedB[k + j * n];
temp ^= temp >> 4;
temp ^= temp >> 2;
temp ^= temp >> 1;
uint8_t parity = temp & (uint8_t)1;
cij = cij ^ parity;
}
C[cIndex] = cij;
}
}
free(compressedA);
free(compressedB);
free(A_COPY);
}