使用 gcc7 或 clang 从 Intel 的 AVX 扩展编译 256 位矢量数据类型 (__m256d) 失败。我能够编译和使用 128 位向量(没有 -mavx 标志)。但是,一旦我尝试使用 avx 向量,要么找不到一些汇编程序命令定义(GCC7),要么存在链接器错误(clang)。我使用 port 来安装所有编译器或软件包。
该程序:
#include <iostream>
#include <emmintrin.h>
#include <immintrin.h>
#include <pmmintrin.h>
#include <xmmintrin.h>
struct cpx256 {
union {
struct {
double r1, i1, r2, i2;
};
__m256d v;
};
cpx256(double r1_in, double i1_in, double r2_in, double i2_in) :
r1(r1_in), i1(i1_in), r2(r2_in), i2(i2_in){}
};
std::ostream & operator<< (std::ostream & os, cpx256 cmplx) {
os << cmplx.r1;
(cmplx.i1 > 0) ? (os << "+" << cmplx.i1 << "j") : (os << cmplx.i1 << "j");
os << " | ";
os << cmplx.r2;
(cmplx.i2 > 0) ? (os << "+" << cmplx.i2 << "j") : (os << cmplx.i2 << "j");
return os;
}
int main(){
cpx256 b = cpx256(1,2, 3, 4);
std::cout << b << std::endl;
return 0;
}
使用 gcc7:
g++ -std=c++11 -Wall -Wextra -mtune=native -ffast-math -fverbose-asm -mavx avx.cpp
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:57:no such instruction: `vmovsd %xmm0, -16(%rbp)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:58:no such instruction: `vmovsd %xmm1, -24(%rbp)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:59:no such instruction: `vmovsd %xmm2, -32(%rbp)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:60:no such instruction: `vmovsd %xmm3, -40(%rbp)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:63:no such instruction: `vmovsd -16(%rbp), %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:64:no such instruction: `vmovsd %xmm0, (%rax)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:66:no such instruction: `vmovsd -24(%rbp), %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:67:no such instruction: `vmovsd %xmm0, 8(%rax)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:69:no such instruction: `vmovsd -32(%rbp), %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:70:no such instruction: `vmovsd %xmm0, 16(%rax)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:72:no such instruction: `vmovsd -40(%rbp), %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:73:no such instruction: `vmovsd %xmm0, 24(%rax)'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:99:no such instruction: `vmovd %rdx, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:103:no such instruction: `vmovsd 24(%rbp), %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:104:no such instruction: `vxorpd %xmm1, %xmm1,%xmm1'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:105:no such instruction: `vcomisd %xmm1, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:113:no such instruction: `vmovd %rax, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:123:no such instruction: `vmovd %rdx, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:138:no such instruction: `vmovd %rdx, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:142:no such instruction: `vmovsd 40(%rbp), %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:143:no such instruction: `vxorpd %xmm1, %xmm1,%xmm1'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:144:no such instruction: `vcomisd %xmm1, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:152:no such instruction: `vmovd %rax, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:162:no such instruction: `vmovd %rdx, %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:190:no such instruction: `vmovsd lC4(%rip), %xmm2'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:191:no such instruction: `vmovsd lC5(%rip), %xmm1'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:192:no such instruction: `vmovsd lC6(%rip), %xmm0'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:195:no such instruction: `vmovapd %xmm2, %xmm3'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:196:no such instruction: `vmovapd %xmm1, %xmm2'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:197:no such instruction: `vmovapd %xmm0, %xmm1'
/var/folders/06/hdqdxlh14czcvkk4t55894y80000gn/T//ccXFPOnA.s:198:no such instruction: `vmovd %rdx, %xmm0'
用clang我得到一个链接器错误:
clang -std=c++11 -Wall -Wextra -mtune=native -ffast-math -fverbose-asm -mavx avx.cpp
Undefined symbols for architecture x86_64:
"std::__1::__basic_string_common<true>::__throw_length_error() const", referenced from:
std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> > std::__1::__pad_and_output<char, std::__1::char_traits<char> >(std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> >, char const*, char const*, char const*, std::__1::ios_base&, char) in avx-dfd779.o
"std::__1::locale::use_facet(std::__1::locale::id&) const", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::endl<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&) in avx-dfd779.o
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::__1::ios_base::getloc() const", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::endl<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&) in avx-dfd779.o
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::~basic_string()", referenced from:
std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> > std::__1::__pad_and_output<char, std::__1::char_traits<char> >(std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> >, char const*, char const*, char const*, std::__1::ios_base&, char) in avx-dfd779.o
"std::__1::basic_ostream<char, std::__1::char_traits<char> >::put(char)", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::endl<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&) in avx-dfd779.o
"std::__1::basic_ostream<char, std::__1::char_traits<char> >::flush()", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::endl<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&) in avx-dfd779.o
"std::__1::basic_ostream<char, std::__1::char_traits<char> >::sentry::sentry(std::__1::basic_ostream<char, std::__1::char_traits<char> >&)", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::__1::basic_ostream<char, std::__1::char_traits<char> >::sentry::~sentry()", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::__1::basic_ostream<char, std::__1::char_traits<char> >::operator<<(double)", referenced from:
operator<<(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, cpx256) in avx-dfd779.o
"std::__1::cout", referenced from:
_main in avx-dfd779.o
"std::__1::ctype<char>::id", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::endl<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&) in avx-dfd779.o
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::__1::locale::~locale()", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::endl<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&) in avx-dfd779.o
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::__1::ios_base::__set_badbit_and_consider_rethrow()", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::__1::ios_base::clear(unsigned int)", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"std::terminate()", referenced from:
___clang_call_terminate in avx-dfd779.o
"operator new(unsigned long)", referenced from:
std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> > std::__1::__pad_and_output<char, std::__1::char_traits<char> >(std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> >, char const*, char const*, char const*, std::__1::ios_base&, char) in avx-dfd779.o
"___cxa_begin_catch", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
___clang_call_terminate in avx-dfd779.o
"___cxa_end_catch", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
"___gxx_personality_v0", referenced from:
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::endl<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&) in avx-dfd779.o
std::__1::basic_ostream<char, std::__1::char_traits<char> >& std::__1::__put_character_sequence<char, std::__1::char_traits<char> >(std::__1::basic_ostream<char, std::__1::char_traits<char> >&, char const*, unsigned long) in avx-dfd779.o
std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> > std::__1::__pad_and_output<char, std::__1::char_traits<char> >(std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> >, char const*, char const*, char const*, std::__1::ios_base&, char) in avx-dfd779.o
Dwarf Exception Unwind Info (__eh_frame) in avx-dfd779.o
ld: symbol(s) not found for architecture x86_64
clang: error: linker command failed with exit code 1 (use -v to see invocation)
铛:
clang version 4.0.1 (tags/RELEASE_401/final)
Target: x86_64-apple-darwin16.6.0
Thread model: posix
InstalledDir: /opt/local/libexec/llvm-4.0/bin
GCC7:
g++ (MacPorts gcc7 7-20170622_0) 7.1.1 20170622
我的笔记本电脑 CPU:2,8 GHz Intel Core i7(应该支持 AVX),Mac OS Sierra 10.12.5
我还用端口(ld64-latest 274.2)更新了我的链接器,并安装并激活了 mp-clang-4.0。有任何想法吗?