我正在尝试使用汇编语言对两个小数组执行简单的点积计算。这是我的代码:
#include <cstdio>
#include <cstdint>
#include <cstdlib>
void fillArray(int16_t* a, int16_t* b, int n){
std::srand(1);
int i = 0;
do
{
a[i] = rand() % 50;
b[i] = rand() % 10;
i++;
} while (i < n);
}
void printArray(int16_t* a, int16_t* b, int n){
int i = 0;
do
{
printf("a[%d]: %d; b[%d]: %d\n", i, a[i], i, b[i]);
i++;
} while (i < n);
}
//control operation
int16_t dotCpp(int16_t* a, int16_t* b, int n){
int16_t dotProd;
int i = 0;
do
{
dotProd += a[i] * b[i];
i++;
} while (i < n);
return dotProd;
}
extern "C" void dotAsm_(int16_t* a, int16_t* b, int16_t *dotProd);
//dotAsm_ file
section .data
section .text
global dotAsm_
dotAsm_:
push ebp
mov ebp, esp
mov eax, [ebp+8] ;load a
mov ebx, [ebp+12] ;load b
mov ecx, [ebp+16] ;load address of dotProd
movq mm0, [eax] ;move content of eax to mm0
movq mm1, [ebx] ;move content of ebx to mm1
movq mm2, mm0 ;copy mm0
pmaddwd mm2, mm1 ;multiply and add
movq mm3, mm2 ;copy mm2 to mm3
psrlq mm3, 32 ;shift mm3 by 32 bits to the right putting the higher-order bits into the lower-order bits
paddd mm2, mm3 ;add lower-order bits saving result in mm2
punpcklwd mm4, mm2 ;unpack the lower order bits
psrld mm4, 16 ;shift right by 16 bits, get the result of the addition
movq [ecx], mm4 ;move result back to the register
pop ebp
emms
ret
int main(int argc, char *argv[])
{
int n = 4;
int16_t sum = 0;
int16_t *dot;
int16_t a[n], b[n];
fillArray(a, b, n);
printArray(a, b, n);
sum = dotCpp(a, b, n);
printf("dotprod: %d\n", sum);
dotAsm_(a, b, dot);
// printf("ASM dotprod: %i\n", &dot);
return 0;
}
从makefile编译:
CXX = g++
CXXFLGS = -g -Wall -std=c++11
SRC = main.o innerProd.o
EXEC = innerProd
$(EXEC): $(SRC)
$(CXX) $(CXXFLGS) $(SRC) -o $(EXEC)
innerProd.o: innerProd.asm
nasm -f elf -F stabs innerProd.asm -o innerProd.o
main.o: innerProd.cpp
$(CXX) $(CXXFLGS) -c innerProd.cpp -o main.o
结果是:
a[0]: 33; b[0]: 6
a[1]: 27; b[1]: 5
a[2]: 43; b[2]: 5
a[3]: 36; b[3]: 2
dotprod: 620
Segmentation fault (core dumped)
使用gdb分析原因显示如下:
//having successfully performed the neccessary calculations:
mm4 {uint64 = 0x26c, v2_int32 = {0x26c, 0x0} ...}
//hence the correct result 0x26c = (dec) 620 has been obtained however loading it back
//into the register causes the segmentation fault.
(gdb) ni
Program received signal SIGSEGV, Segmentation fault.
0x08048709 in dotAsm_ ()
我不知道为什么我不能将结果移回寄存器。任何建议都受到高度赞赏。
提前多次感谢您。
文森特