您可以比较为基本功能生成的程序集(我在 OS X 10.7.4 上使用了 GCC 4.8.1)
struct Point3D {
float m_data[3];
};
struct Point3Ds {
float x;
float y;
float z;
};
double dot(const Point3D& p1, const Point3D& p2) {
asm("# Dot - Point3D");
return p1.m_data[0] * p2.m_data[0] +
p1.m_data[1] * p2.m_data[1] +
p1.m_data[2] * p2.m_data[2];
}
double dot(const Point3Ds& p1, const Point3Ds&p2) {
asm("# Dot - Point3Ds");
return p1.x * p2.x +
p1.y * p2.y +
p1.z * p2.z;
}
Point3D cross(const Point3D& p1, const Point3D& p2) {
asm("# Cross - Point3D");
return { p1.m_data[1] * p2.m_data[2] - p1.m_data[2] * p2.m_data[1],
p1.m_data[2] * p2.m_data[0] - p1.m_data[0] * p2.m_data[2],
p1.m_data[0] * p2.m_data[1] - p1.m_data[1] * p2.m_data[0]};
}
Point3D cross(const Point3Ds& p1, const Point3Ds& p2) {
asm("# Cross - Point3Ds");
return { p1.y * p2.z - p1.z * p2.y,
p1.z * p2.x - p1.x * p2.z,
p1.x * p2.y - p1.y * p2.x};
}
在g++ -O3 -S
我获得以下汇编程序(相关部分)时进行编译:
# 12 "point3f.cpp" 1
# Dot - Point3D
# 0 "" 2
movss (%rdi), %xmm0
movss 4(%rdi), %xmm1
mulss (%rsi), %xmm0
mulss 4(%rsi), %xmm1
addss %xmm1, %xmm0
movss 8(%rdi), %xmm1
mulss 8(%rsi), %xmm1
addss %xmm1, %xmm0
unpcklps %xmm0, %xmm0
cvtps2pd %xmm0, %xmm0
ret
LFE0:
.align 4,0x90
.globl __Z3dotRK8Point3DsS1_
__Z3dotRK8Point3DsS1_:
LFB1:
# 19 "point3f.cpp" 1
# Dot - Point3Ds
# 0 "" 2
movss (%rdi), %xmm0
movss 4(%rdi), %xmm1
mulss (%rsi), %xmm0
mulss 4(%rsi), %xmm1
addss %xmm1, %xmm0
movss 8(%rdi), %xmm1
mulss 8(%rsi), %xmm1
addss %xmm1, %xmm0
unpcklps %xmm0, %xmm0
cvtps2pd %xmm0, %xmm0
ret
LFE1:
.align 4,0x90
.globl __Z5crossRK7Point3DS1_
__Z5crossRK7Point3DS1_:
LFB2:
# 26 "point3f.cpp" 1
# Cross - Point3D
# 0 "" 2
movss 4(%rdi), %xmm3
movss 8(%rdi), %xmm1
movss 8(%rsi), %xmm5
movaps %xmm3, %xmm2
movss 4(%rsi), %xmm4
movaps %xmm1, %xmm0
mulss %xmm5, %xmm2
mulss %xmm4, %xmm0
subss %xmm0, %xmm2
movss (%rdi), %xmm0
mulss %xmm0, %xmm5
movss %xmm2, -24(%rsp)
movss (%rsi), %xmm2
mulss %xmm4, %xmm0
mulss %xmm2, %xmm1
mulss %xmm3, %xmm2
subss %xmm5, %xmm1
subss %xmm2, %xmm0
movss %xmm1, -20(%rsp)
movss %xmm0, -16(%rsp)
movq -24(%rsp), %xmm0
movd -16(%rsp), %xmm1
ret
LFE2:
.align 4,0x90
.globl __Z5crossRK8Point3DsS1_
__Z5crossRK8Point3DsS1_:
LFB3:
# 33 "point3f.cpp" 1
# Cross - Point3Ds
# 0 "" 2
movss 4(%rdi), %xmm3
movss 8(%rdi), %xmm1
movss 8(%rsi), %xmm5
movaps %xmm3, %xmm2
movss 4(%rsi), %xmm4
movaps %xmm1, %xmm0
mulss %xmm5, %xmm2
mulss %xmm4, %xmm0
subss %xmm0, %xmm2
movss (%rdi), %xmm0
mulss %xmm0, %xmm5
movss %xmm2, -24(%rsp)
movss (%rsi), %xmm2
mulss %xmm4, %xmm0
mulss %xmm2, %xmm1
mulss %xmm3, %xmm2
subss %xmm5, %xmm1
subss %xmm2, %xmm0
movss %xmm1, -20(%rsp)
movss %xmm0, -16(%rsp)
movq -24(%rsp), %xmm0
movd -16(%rsp), %xmm1
ret
所以组装是相同的。但我同意将其存储为静态数组(即 a float m_data[3]
)会更实用,因为我可以两全其美:在需要时传递一个参数,以及一个高级的惯用x
, y
,z
通过吸气剂。从这个意义上说,我相信我会以类似于以下方式实现这样的类:
class MyPoint3S {
public:
MyPoint3S(float x, float y, float z)
: m_data{x, y, z} { }
// the following getters will be inlined
float x() const {
return m_data[0];
}
float y() const {
return m_data[1];
}
float z() const {
return m_data[2];
}
// in case you want to use the pointer --- some would advice against
// offering a hook to a private member.
float* data() {
return m_data;
}
private:
float m_data[3];
};
并像这样使用它:
MyPoint3S p(1.0f, 2.0f, 3.0f);
std::cout<<"p = "<<p.x()<<", "<<p.y()<<", "<<p.z()<<std::endl;
获得:
p = 1, 2, 3
或者以您喜欢的任何方式调用 OpenGL 函数:
glVertex3fv(p.data());
或者
glVertex3f(p.x(), p.y(), p.z());