如果在编译时知道数组大小,则可以执行以下操作:
#include <inttypes.h>
#include <malloc.h>
#include <stdio.h>
#include <memory.h>
#define str(s) #s
#define xstr(s) str(s)
#define ARRAYSIZE 4
asm(".macro AddArray2 p1, p2, from, to\n\t"
"movq (\\from*8)(\\p2), %rax\n\t"
"adcq %rax, (\\from*8)(\\p1)\n\t"
".if \\to-\\from\n\t"
" AddArray2 \\p1, \\p2, \"(\\from+1)\", \\to\n\t"
".endif\n\t"
".endm\n");
asm(".macro AddArray p1, p2, p3\n\t"
"movq (\\p2), %rax\n\t"
"addq %rax, (\\p1)\n\t"
".if \\p3-1\n\t"
" AddArray2 \\p1, \\p2, 1, (\\p3-1)\n\t"
".endif\n\t"
".endm");
int main()
{
unsigned char carry;
// assert(ARRAYSIZE > 0);
// Create the arrays
uint64_t *anum = (uint64_t *)malloc(ARRAYSIZE * sizeof(uint64_t));
uint64_t *bnum = (uint64_t *)malloc(ARRAYSIZE * sizeof(uint64_t));
// Put some data in
memset(anum, 0xff, ARRAYSIZE * sizeof(uint64_t));
memset(bnum, 0, ARRAYSIZE * sizeof(uint64_t));
bnum[0] = 1;
// Print the arrays before the add
printf("anum: ");
for (int x=0; x < ARRAYSIZE; x++)
{
printf("%I64x ", anum[x]);
}
printf("\nbnum: ");
for (int x=0; x < ARRAYSIZE; x++)
{
printf("%I64x ", bnum[x]);
}
printf("\n");
// Add the arrays
asm ("AddArray %[anum], %[bnum], " xstr(ARRAYSIZE) "\n\t"
"setc %[carry]" // Get the flags from the final add
: [carry] "=q"(carry)
: [anum] "r" (anum), [bnum] "r" (bnum)
: "rax", "cc", "memory"
);
// Print the result
printf("Result: ");
for (int x=0; x < ARRAYSIZE; x++)
{
printf("%I64x ", anum[x]);
}
printf(": %d\n", carry);
}
这给出了这样的代码:
mov (%rsi),%rax
add %rax,(%rbx)
mov 0x8(%rsi),%rax
adc %rax,0x8(%rbx)
mov 0x10(%rsi),%rax
adc %rax,0x10(%rbx)
mov 0x18(%rsi),%rax
adc %rax,0x18(%rbx)
setb %bpl
由于对所有 f 加 1 将完全溢出所有内容,因此上面代码的输出为:
anum: ffffffffffffffff ffffffffffffffff ffffffffffffffff ffffffffffffffff
bnum: 1 0 0 0
Result: 0 0 0 0 : 1
正如所写,ARRAYSIZE 最多可以包含大约 100 个元素(由于 gnu 的宏深度嵌套限制)。好像应该够了。。。