我有一个存储在 1 个字节中的浮点数(作为 8 位浮点数)。我们在 boost 或 c++11(或 14)中是否有一个库函数,可以将浮点数转换为小数?
我知道如何将 8 位(符号位、指数、尾数)转换为小数。我只是想利用库函数而不是编写一个新函数?
引用现有函数也会有所帮助
我有一个存储在 1 个字节中的浮点数(作为 8 位浮点数)。我们在 boost 或 c++11(或 14)中是否有一个库函数,可以将浮点数转换为小数?
我知道如何将 8 位(符号位、指数、尾数)转换为小数。我只是想利用库函数而不是编写一个新函数?
引用现有函数也会有所帮助
标准方法不会很有效,但存在:
friend std::ostream& operator<<(std::ostream& os, Num n) {
return os << n.mantissa * pow(2.0f, n.exp) * (n.sign? -1:1);
}
当然,这是使用内置浮点序列化代码作弊。但这似乎正是您所要求的。
为了好玩,我整理了一个非常有限的定点类型。请注意,构造函数有很大缺陷(它不知道(de)normal,NaN,并且根本不能很好地缩放小尾数)。但它确实证明了上面的转换,所以我可以检查它们是否正常工作:
#include <iostream>
#include <limits>
#include <cmath>
template <typename Underlying = std::uint8_t, unsigned expbits = 4>
struct Num {
constexpr Num() noexcept : sign{}, raw_exp{}, mantissa{} {} // NSMI is c++20 for bitfield
template <typename F> Num(F d) {
// This is a lame constructor, for demo only
// DO NOT USE FOR PRODUCTION/SERIOUS CODE
sign = std::signbit(d);
int e=0;
d = std::frexp(std::abs(d), &e);
effective_exp(e - manbits);
mantissa = std::ldexp(d, manbits);
}
explicit constexpr operator double() const { return mantissa * pow(2.0, effective_exp()) * (sign? -1:1); }
explicit constexpr operator float() const { return mantissa * pow(2.0f, effective_exp()) * (sign? -1:1); }
private:
friend std::ostream& operator<<(std::ostream& os, Num n) {
return os << static_cast<double>(n);
}
constexpr auto effective_exp() const { return raw_exp - (1<<(expbits - 1)); }
void effective_exp(int e) {
if (e>maxexp||e<minexp) throw std::range_error("overflow");
raw_exp = e + (1<<(expbits - 1));
}
// storage and dimensioning
static_assert(not std::numeric_limits<Underlying>::is_signed);
static constexpr unsigned bits = std::numeric_limits<Underlying>::digits;
static constexpr unsigned signbits = 1;
static constexpr unsigned manbits = bits - expbits - signbits;
static constexpr int maxexp = 1<<(expbits-1);
static constexpr int minexp = 1 - (1<<(expbits-1));
Underlying sign: signbits, raw_exp: expbits, mantissa: manbits;
};
namespace { // just for demo, very inefficient because not essential
template <typename U, unsigned s>
static inline bool operator<(Num<U, s> const& lhs, double rhs) { return lhs.operator double() < rhs; }
template <typename U, unsigned s>
static inline bool operator<(Num<U, s> const& lhs, Num<U, s> const& rhs) { return lhs < rhs.operator double();
}
template <typename U, unsigned s>
static inline auto& operator+=(Num<U, s>& lhs, double rhs) {
return lhs = lhs.operator float() + rhs;
}
} // namespace
int main() {
{
static_assert(sizeof(Num<>) == sizeof(char));
Num x = 1.8;
std::cout << "Proof of pudding: " << x << "\n";
}
// just more paces
std::cout << "----- 24 bits, 7 expbits: \n";
for (Num<uint32_t, 7> n = -10.0; n < 10.0; n += 1.1)
std::cout << n << "\n";
std::cout << "----- 10 bits, 5 expbits: \n";
for (Num<uint16_t, 5> n = -10.0; n < 10.0; n += 1.1)
std::cout << n << "\n";
// don't try with 8bit because the flawed ctor will underflow, oh well
}
印刷
Proof of pudding: 1.75
----- 24 bits, 7 expbits:
-10
-8.9
-7.8
-6.7
-5.6
-4.5
-3.4
-2.3
-1.2
-0.0999978
1
2.1
3.2
4.3
5.4
6.5
7.6
8.7
9.8
----- 10 bits, 5 expbits:
-10
-8.89062
-7.78906
-6.6875
-5.58594
-4.48438
-3.38281
-2.28125
-1.17969
-0.0795898
1.01953
2.11719
3.21484
4.3125
5.40625
6.5
7.59375
8.6875
9.78125