编辑:这里的代码仍然有一些错误,它可以在性能部门做得更好,但是我没有尝试修复这个问题,而是将问题提交给英特尔讨论组并得到了很多很好的反馈,如果一切顺利,Atomic float 的抛光版本将包含在英特尔线程构建模块的近期版本中
好的,这是一个艰难的,我想要一个原子浮点数,不是为了超快的图形性能,而是作为类的数据成员常规使用。而且我不想为在这些类上使用锁付出代价,因为它没有为我的需求提供额外的好处。
现在有了英特尔的 tbb 和我见过的其他原子库,支持整数类型,但不支持浮点。所以我继续实施了一个,它有效......但我不确定它是否真的有效,或者我很幸运它有效。
这里的任何人都知道这是否不是某种形式的线程异端?
typedef unsigned int uint_32;
struct AtomicFloat
{
private:
tbb::atomic<uint_32> atomic_value_;
public:
template<memory_semantics M>
float fetch_and_store( float value )
{
const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::fetch_and_store<M>((uint_32&)value);
return reinterpret_cast<const float&>(value_);
}
float fetch_and_store( float value )
{
const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::fetch_and_store((uint_32&)value);
return reinterpret_cast<const float&>(value_);
}
template<memory_semantics M>
float compare_and_swap( float value, float comparand )
{
const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::compare_and_swap<M>((uint_32&)value,(uint_32&)compare);
return reinterpret_cast<const float&>(value_);
}
float compare_and_swap(float value, float compare)
{
const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::compare_and_swap((uint_32&)value,(uint_32&)compare);
return reinterpret_cast<const float&>(value_);
}
operator float() const volatile // volatile qualifier here for backwards compatibility
{
const uint_32 value_ = atomic_value_;
return reinterpret_cast<const float&>(value_);
}
float operator=(float value)
{
const uint_32 value_ = atomic_value_.tbb::atomic<uint_32>::operator =((uint_32&)value);
return reinterpret_cast<const float&>(value_);
}
float operator+=(float value)
{
volatile float old_value_, new_value_;
do
{
old_value_ = reinterpret_cast<float&>(atomic_value_);
new_value_ = old_value_ + value;
} while(compare_and_swap(new_value_,old_value_) != old_value_);
return (new_value_);
}
float operator*=(float value)
{
volatile float old_value_, new_value_;
do
{
old_value_ = reinterpret_cast<float&>(atomic_value_);
new_value_ = old_value_ * value;
} while(compare_and_swap(new_value_,old_value_) != old_value_);
return (new_value_);
}
float operator/=(float value)
{
volatile float old_value_, new_value_;
do
{
old_value_ = reinterpret_cast<float&>(atomic_value_);
new_value_ = old_value_ / value;
} while(compare_and_swap(new_value_,old_value_) != old_value_);
return (new_value_);
}
float operator-=(float value)
{
return this->operator+=(-value);
}
float operator++()
{
return this->operator+=(1);
}
float operator--()
{
return this->operator+=(-1);
}
float fetch_and_add( float addend )
{
return this->operator+=(-addend);
}
float fetch_and_increment()
{
return this->operator+=(1);
}
float fetch_and_decrement()
{
return this->operator+=(-1);
}
};
谢谢!
编辑:按照 Greg Rogers 的建议将 size_t 更改为 uint32_t,这样它更便携
编辑:为整个事物添加了列表,并进行了一些修复。
更多编辑:在我的机器上使用锁定浮点数进行 5.000.000 += 操作(在我的机器上使用 100 个线程)需要 3.6 秒,而我的原子浮点数即使是愚蠢的 do-while 也需要 0.2 秒才能完成相同的工作。因此,> 30 倍的性能提升意味着它是值得的,(这就是问题所在)如果它是正确的。
更多编辑:正如 Awgn 指出的那样,我的fetch_and_xxxx
部分都错了。修复了这个问题并删除了我不确定的部分 API(模板化内存模型)。并在运算符 += 方面实现了其他操作,以避免代码重复
添加:添加了运算符 *= 和运算符 /=,因为没有它们,浮点数就不会是浮点数。感谢 Peterchen 的评论,这被注意到了
编辑:最新版本的代码如下(我将保留旧版本以供参考)
#include <tbb/atomic.h>
typedef unsigned int uint_32;
typedef __TBB_LONG_LONG uint_64;
template<typename FLOATING_POINT,typename MEMORY_BLOCK>
struct atomic_float_
{
/* CRC Card -----------------------------------------------------
| Class: atmomic float template class
|
| Responsability: handle integral atomic memory as it were a float,
| but partially bypassing FPU, SSE/MMX, so it is
| slower than a true float, but faster and smaller
| than a locked float.
| *Warning* If your float usage is thwarted by
| the A-B-A problem this class isn't for you
| *Warning* Atomic specification says we return,
| values not l-values. So (i = j) = k doesn't work.
|
| Collaborators: intel's tbb::atomic handles memory atomicity
----------------------------------------------------------------*/
typedef typename atomic_float_<FLOATING_POINT,MEMORY_BLOCK> self_t;
tbb::atomic<MEMORY_BLOCK> atomic_value_;
template<memory_semantics M>
FLOATING_POINT fetch_and_store( FLOATING_POINT value )
{
const MEMORY_BLOCK value_ =
atomic_value_.tbb::atomic<MEMORY_BLOCK>::fetch_and_store<M>((MEMORY_BLOCK&)value);
//atomic specification requires returning old value, not new one
return reinterpret_cast<const FLOATING_POINT&>(value_);
}
FLOATING_POINT fetch_and_store( FLOATING_POINT value )
{
const MEMORY_BLOCK value_ =
atomic_value_.tbb::atomic<MEMORY_BLOCK>::fetch_and_store((MEMORY_BLOCK&)value);
//atomic specification requires returning old value, not new one
return reinterpret_cast<const FLOATING_POINT&>(value_);
}
template<memory_semantics M>
FLOATING_POINT compare_and_swap( FLOATING_POINT value, FLOATING_POINT comparand )
{
const MEMORY_BLOCK value_ =
atomic_value_.tbb::atomic<MEMORY_BLOCK>::compare_and_swap<M>((MEMORY_BLOCK&)value,(MEMORY_BLOCK&)compare);
//atomic specification requires returning old value, not new one
return reinterpret_cast<const FLOATING_POINT&>(value_);
}
FLOATING_POINT compare_and_swap(FLOATING_POINT value, FLOATING_POINT compare)
{
const MEMORY_BLOCK value_ =
atomic_value_.tbb::atomic<MEMORY_BLOCK>::compare_and_swap((MEMORY_BLOCK&)value,(MEMORY_BLOCK&)compare);
//atomic specification requires returning old value, not new one
return reinterpret_cast<const FLOATING_POINT&>(value_);
}
operator FLOATING_POINT() const volatile // volatile qualifier here for backwards compatibility
{
const MEMORY_BLOCK value_ = atomic_value_;
return reinterpret_cast<const FLOATING_POINT&>(value_);
}
//Note: atomic specification says we return the a copy of the base value not an l-value
FLOATING_POINT operator=(FLOATING_POINT rhs)
{
const MEMORY_BLOCK value_ = atomic_value_.tbb::atomic<MEMORY_BLOCK>::operator =((MEMORY_BLOCK&)rhs);
return reinterpret_cast<const FLOATING_POINT&>(value_);
}
//Note: atomic specification says we return an l-value when operating among atomics
self_t& operator=(self_t& rhs)
{
const MEMORY_BLOCK value_ = atomic_value_.tbb::atomic<MEMORY_BLOCK>::operator =((MEMORY_BLOCK&)rhs);
return *this;
}
FLOATING_POINT& _internal_reference() const
{
return reinterpret_cast<FLOATING_POINT&>(atomic_value_.tbb::atomic<MEMORY_BLOCK>::_internal_reference());
}
FLOATING_POINT operator+=(FLOATING_POINT value)
{
FLOATING_POINT old_value_, new_value_;
do
{
old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_);
new_value_ = old_value_ + value;
//floating point binary representation is not an issue because
//we are using our self's compare and swap, thus comparing floats and floats
} while(self_t::compare_and_swap(new_value_,old_value_) != old_value_);
return (new_value_); //return resulting value
}
FLOATING_POINT operator*=(FLOATING_POINT value)
{
FLOATING_POINT old_value_, new_value_;
do
{
old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_);
new_value_ = old_value_ * value;
//floating point binary representation is not an issue becaus
//we are using our self's compare and swap, thus comparing floats and floats
} while(self_t::compare_and_swap(new_value_,old_value_) != old_value_);
return (new_value_); //return resulting value
}
FLOATING_POINT operator/=(FLOATING_POINT value)
{
FLOATING_POINT old_value_, new_value_;
do
{
old_value_ = reinterpret_cast<FLOATING_POINT&>(atomic_value_);
new_value_ = old_value_ / value;
//floating point binary representation is not an issue because
//we are using our self's compare and swap, thus comparing floats and floats
} while(self_t::compare_and_swap(new_value_,old_value_) != old_value_);
return (new_value_); //return resulting value
}
FLOATING_POINT operator-=(FLOATING_POINT value)
{
return this->operator+=(-value); //return resulting value
}
//Prefix operator
FLOATING_POINT operator++()
{
return this->operator+=(1); //return resulting value
}
//Prefix operator
FLOATING_POINT operator--()
{
return this->operator+=(-1); //return resulting value
}
//Postfix operator
FLOATING_POINT operator++(int)
{
const FLOATING_POINT temp = this;
this->operator+=(1);
return temp//return resulting value
}
//Postfix operator
FLOATING_POINT operator--(int)
{
const FLOATING_POINT temp = this;
this->operator+=(1);
return temp//return resulting value
}
FLOATING_POINT fetch_and_add( FLOATING_POINT addend )
{
const FLOATING_POINT old_value_ = atomic_value_;
this->operator+=(addend);
//atomic specification requires returning old value, not new one as in operator x=
return old_value_;
}
FLOATING_POINT fetch_and_increment()
{
const FLOATING_POINT old_value_ = atomic_value_;
this->operator+=(+1);
//atomic specification requires returning old value, not new one as in operator x=
return old_value_;
}
FLOATING_POINT fetch_and_decrement()
{
const FLOATING_POINT old_value_ = atomic_value_;
this->operator+=(-1);
//atomic specification requires returning old value, not new one as in operator x=
return old_value_;
}
};
typedef atomic_float_<float,uint_32> AtomicFloat;
typedef atomic_float_<double,uint_64> AtomicDouble;