0

我正在尝试在内mpz_t联 X86 程序集中实现 GMP 对象的多精度乘法。根据我对输出变量的约束选择,我要么得到分段错误,要么输出变量中的值以不一致的方式损坏(即不同的代码运行导致值以不同的方式损坏)。

这段代码所做的是采用两个 GMPmpz_t对象,ain并且bin,每个对象都保证大小为 13(即_mp_size设置为 13,对象由 13、64 位数字定义),然后生成mpz_t大小为 26 的对象 res,即是相乘ainbin相加的结果。我不使用的原因mpz_mul是因为此方法通常会导致此特定设置的性能提高。

请注意res->_mp_d, ain->_mp_dbin->_mp_d参考定义各个mpz_t对象的“肢体”数组,其中最(obj->_mp_d)[0]不重要(obj->_mp_d)[obj->_mp_size-1]的肢体是最重要的肢体。

如果有人可以帮助解释我在这里做错了什么,我将不胜感激!下面是一段代码。我已经排除了大部分程序集,因为它是重复的,但我认为我给出了足够的信息来很好地说明正在发生的事情:

void mpz_mul_x86_1(mpz_t res, mpz_t ain, mpz_t bin){

   if( res->_mp_alloc<26) //the next few lines makes sure res is large enough
     _mpz_realloc(res,26); //the result of the multiplication

   res->_mp_size = 26;


   asm volatile (            
     "movq 0(%1), %%rax;" 
     "mulq 0(%2);"
     "movq %%rax, 0(%0);"    
     "movq %%rdx, %%r8;"           //A0*B0
                                   //0

     "xorq %%r10, %%r10;" 

     "movq 8(%1), %%rax;"      
     "mulq 0(%2);"              
     "addq %%rax, %%r8;"     
     "movq %%rdx, %%r9;"  
     "adcq $0, %%r9;"              //A1*B0

     "movq 0(%1), %%rax;"  
     "mulq 8(%2);"         
     "addq %%rax, %%r8;" 
     "movq %%r8, 8(%0);"  
     "adcq %%rdx,%%r9;"    
     "adcq $0, %%r10;"                //A0*B1
                                     //1

     "xorq %%r8, %%r8;" 

     "movq 0(%1), %%rax;"
     "mulq 16(%2);"            
     "addq %%rax, %%r9;"            
     "adcq %%rdx, %%r10;"
     "adcq $0, %%r8;"           //A0*B2

     "movq 8(%1), %%rax;"
     "mulq 8(%2);"            
     "addq %%rax, %%r9;"            
     "adcq %%rdx, %%r10;"
     "adcq $0, %%r8;"        //A1*B1

     "movq 16(%1), %%rax;"
     "mulq 0(%2);"            
     "addq %%rax, %%r9;"    
     "movq %%r9, 16(%0);" 
     "adcq %%rdx, %%r10;"
     "adcq $0, %%r8;"            //A2*B0
                                 //2
     "xorq %%r9, %%r9;"  

     "movq 24(%1), %%rax;"
     "mulq 0(%2);"            
     "addq %%rax, %%r10;"            
     "adcq %%rdx, %%r8;"
     "adcq $0, %%r9;"              //A3*B0

     "movq 0(%1), %%rax;"
     "mulq 24(%2);"            
     "addq %%rax, %%r10;"            
     "adcq %%rdx, %%r8;"
     "adcq $0, %%r9;"            //A0*B3

     "movq 16(%1), %%rax;"
     "mulq 8(%2);"            
     "addq %%rax, %%r10;"            
     "adcq %%rdx, %%r8;"
     "adcq $0, %%r9;"        //A2*B1

     "movq 8(%1), %%rax;"
     "mulq 16(%2);"            
     "addq %%rax, %%r10;"   
     "movq %%r10, 24(%0);" 
     "adcq %%rdx, %%r8;"
     "adcq $0, %%r9;"        //A1*B2
                             //3


    /*About 1000 lines of omitted Assembly code is from here*/


     "xor %%r8, %%r8;"

     "movq 96(%1), %%rax;"
     "mulq 88(%2);"            
     "addq %%rax, %%r9;"
     "adcq %%rdx, %%r10;"
     "adcq $0, %%r8;"    //A12*B11

     "movq 88(%1), %%rax;"
     "mulq 96(%2);"            
     "addq %%rax, %%r9;"
     "movq %%r9, 184(%0);"
     "adcq %%rdx, %%r10;"
     "adcq $0, %%r8;"    //A11*B12
                         //23
     "xor %%r9, %%r9;"

     "movq 96(%1), %%rax;"
     "mulq 96(%2);"            
     "addq %%rax, %%r10;"
     "movq %%r10, 192(%0);"
     "adcq %%rdx, %%r8;"
     "adcq $0, %%r8;"    //A12*B12
                         //24

     "movq %%r8, 200(%0);" //25


     :  "=&r" (res->_mp_d) 
     : "r" ((ain->_mp_d)), "r" ((bin->_mp_d))
     : "%rax", "%rdx", "%r8", "%r9", "%r10", "memory", "cc"
     );
}
4

1 回答 1

1

您错误地声明 res->_mp_d 是 asm 语句的输出,而实际上它是指向输出的指针的输入。

于 2013-08-22T19:39:53.157 回答