我正在尝试在内mpz_t
联 X86 程序集中实现 GMP 对象的多精度乘法。根据我对输出变量的约束选择,我要么得到分段错误,要么输出变量中的值以不一致的方式损坏(即不同的代码运行导致值以不同的方式损坏)。
这段代码所做的是采用两个 GMPmpz_t
对象,ain
并且bin
,每个对象都保证大小为 13(即_mp_size
设置为 13,对象由 13、64 位数字定义),然后生成mpz_t
大小为 26 的对象 res,即是相乘ain
和bin
相加的结果。我不使用的原因mpz_mul
是因为此方法通常会导致此特定设置的性能提高。
请注意res->_mp_d, ain->_mp_d
并bin->_mp_d
参考定义各个mpz_t
对象的“肢体”数组,其中最(obj->_mp_d)[0]
不重要(obj->_mp_d)[obj->_mp_size-1]
的肢体是最重要的肢体。
如果有人可以帮助解释我在这里做错了什么,我将不胜感激!下面是一段代码。我已经排除了大部分程序集,因为它是重复的,但我认为我给出了足够的信息来很好地说明正在发生的事情:
void mpz_mul_x86_1(mpz_t res, mpz_t ain, mpz_t bin){
if( res->_mp_alloc<26) //the next few lines makes sure res is large enough
_mpz_realloc(res,26); //the result of the multiplication
res->_mp_size = 26;
asm volatile (
"movq 0(%1), %%rax;"
"mulq 0(%2);"
"movq %%rax, 0(%0);"
"movq %%rdx, %%r8;" //A0*B0
//0
"xorq %%r10, %%r10;"
"movq 8(%1), %%rax;"
"mulq 0(%2);"
"addq %%rax, %%r8;"
"movq %%rdx, %%r9;"
"adcq $0, %%r9;" //A1*B0
"movq 0(%1), %%rax;"
"mulq 8(%2);"
"addq %%rax, %%r8;"
"movq %%r8, 8(%0);"
"adcq %%rdx,%%r9;"
"adcq $0, %%r10;" //A0*B1
//1
"xorq %%r8, %%r8;"
"movq 0(%1), %%rax;"
"mulq 16(%2);"
"addq %%rax, %%r9;"
"adcq %%rdx, %%r10;"
"adcq $0, %%r8;" //A0*B2
"movq 8(%1), %%rax;"
"mulq 8(%2);"
"addq %%rax, %%r9;"
"adcq %%rdx, %%r10;"
"adcq $0, %%r8;" //A1*B1
"movq 16(%1), %%rax;"
"mulq 0(%2);"
"addq %%rax, %%r9;"
"movq %%r9, 16(%0);"
"adcq %%rdx, %%r10;"
"adcq $0, %%r8;" //A2*B0
//2
"xorq %%r9, %%r9;"
"movq 24(%1), %%rax;"
"mulq 0(%2);"
"addq %%rax, %%r10;"
"adcq %%rdx, %%r8;"
"adcq $0, %%r9;" //A3*B0
"movq 0(%1), %%rax;"
"mulq 24(%2);"
"addq %%rax, %%r10;"
"adcq %%rdx, %%r8;"
"adcq $0, %%r9;" //A0*B3
"movq 16(%1), %%rax;"
"mulq 8(%2);"
"addq %%rax, %%r10;"
"adcq %%rdx, %%r8;"
"adcq $0, %%r9;" //A2*B1
"movq 8(%1), %%rax;"
"mulq 16(%2);"
"addq %%rax, %%r10;"
"movq %%r10, 24(%0);"
"adcq %%rdx, %%r8;"
"adcq $0, %%r9;" //A1*B2
//3
/*About 1000 lines of omitted Assembly code is from here*/
"xor %%r8, %%r8;"
"movq 96(%1), %%rax;"
"mulq 88(%2);"
"addq %%rax, %%r9;"
"adcq %%rdx, %%r10;"
"adcq $0, %%r8;" //A12*B11
"movq 88(%1), %%rax;"
"mulq 96(%2);"
"addq %%rax, %%r9;"
"movq %%r9, 184(%0);"
"adcq %%rdx, %%r10;"
"adcq $0, %%r8;" //A11*B12
//23
"xor %%r9, %%r9;"
"movq 96(%1), %%rax;"
"mulq 96(%2);"
"addq %%rax, %%r10;"
"movq %%r10, 192(%0);"
"adcq %%rdx, %%r8;"
"adcq $0, %%r8;" //A12*B12
//24
"movq %%r8, 200(%0);" //25
: "=&r" (res->_mp_d)
: "r" ((ain->_mp_d)), "r" ((bin->_mp_d))
: "%rax", "%rdx", "%r8", "%r9", "%r10", "memory", "cc"
);
}