c++ - 如果可以接受一些精度损失，如何快速紧凑地保存双矩阵？

Question

我有一个matrix of double尺寸为1024x1024. 我想把它写到一个文件中。这是解决方案。正如预期的那样，接受的答案比问题中的第二个答案的时间效率高 50-60%（根据我的简单测试，即以两种方法写入文件）。

还有另一种解决方案是写入 csv 文件（问题中接受的答案），它要慢得多（慢 3-4 倍）

当我写入文件时，矩阵中每个元素的浮点数为 16，输出如下：

-1.6819883882999420e-001 -3.5269531607627869e-001 2.4137189984321594e-001 -3.9325976371765137e-001 -2.2069962322711945e-001 -5.9525445103645325e-002

当我将它写入文件时，第一种方式（第一个链接，接受的答案）和第三种方式（第二个链接，接受的答案）的文件大小变为 24 MB 和 37 MB，这两种方式都是不可接受的。

我需要快速设置矩阵的精度，我的输出变成-1.6819e-01 -3.5269e-01. 任何帮助将不胜感激。

我正在做的是读取 1024x1024 的图像，然后对其进行处理，然后将输出 Mat（它是双精度的）写入文件。考虑到我有数千张图像，我的图像都小于 1 MB，每个图像的运行时间不到 1 秒而不保存。

编辑：当我在 Matlab 中保存相同的矩阵时，它变为 6.75 MB

score 1 · Accepted Answer

编辑

现在可以选择以 16 位浮点数存储。
- 你会失去一些精度（取决于数据范围，改变 rnd 分布以查看不同的错误率）。
- 较慢（20-30 毫秒）（如果重要，请参阅下面的链接了解可能更快的技巧）
- 2mb
如果您知道数据的性质（范围、分布表面（可能是可变长度编码）），可能还可以做更多
- 请参见此处（32 位到 16 位浮点转换）。

代码

//using code lifted from http://www.mathworks.com/matlabcentral/fileexchange/23173    
#include "opencv2/opencv.hpp"
#include <string.h>
#include <math.h>

using namespace cv;

#define  INT16_TYPE          short
#define UINT16_TYPE unsigned short
#define  INT32_TYPE          long
#define UINT32_TYPE unsigned long

int doubles2halfp(void *target, void *source, int numel);
int halfp2doubles(void *target, void *source, int numel);

void writemat(char* fpath,Mat& data,bool isf16)
{

    FILE* fp = fopen(fpath,"wb");
    if (!fp)perror("fopen");
    double dbuf[1024];

    for(int i=0;i<1024;++i)
    {
        for(int j=0;j<1024;++j)
            dbuf[j]=data.at<double>(i,j);     
        if(isf16)
        {
            UINT16_TYPE hbuf[1024];
            doubles2halfp(&hbuf,&dbuf,1024);
            fwrite(&hbuf,sizeof(UINT16_TYPE),1024,fp);
        }else
        {
            fwrite(&dbuf,sizeof(double),1024,fp);
        }
    }
    fclose(fp);
}

void readmat(char* fpath,Mat& data,bool isf16)
{
    FILE* fp = fopen(fpath,"rb");
    if (!fp)perror("fopen");

    double dbuf[1024];
    for(int i=0;i<1024;++i)
    {
        if(isf16)
        {
            UINT16_TYPE hbuf[1024];
            fread(&hbuf,sizeof(UINT16_TYPE),1024,fp);
            halfp2doubles(&dbuf,&hbuf,1024);
        }else{

            fread(&dbuf,sizeof(double),1024,fp);
        }
        for(int j=0;j<1024;++j)
        {
            data.at<double>(i,j)=dbuf[j];
        }
    }
    fclose(fp);
}

int main()
{
    RNG rng=  theRNG();
    Mat data = Mat::zeros(Size(1024,1024),CV_64FC1);
    for(int i=0;i<1024;++i)
        for(int j=0;j<1024;++j)
            data.at<double>(i,j)=rng.uniform(-1.,1.);

    writemat("img.bin",data,true); 

    Mat res = Mat::zeros(Size(1024,1024),CV_64FC1);
    readmat("img.bin",res,true);

    double error=0;
    for(int i=0;i<1024;++i)
        for(int j=0;j<1024;++j)
        {
            //printf("%f %f\n",data.at<double>(i,j),res.at<double>(i,j));
            error+=abs(data.at<double>(i,j)-res.at<double>(i,j));
        }
    printf("err=%f avgerr=%f\n",error,error/1024/1024);

    getchar();
    return 0;
}

////////////////////////////////////////
////////////////////////////////////////
////////////////////////////////////////
////////////////////////////////////////


int doubles2halfp(void *target, void *source, int numel)
{
    UINT16_TYPE *hp = (UINT16_TYPE *) target; // Type pun output as an unsigned 16-bit int
    UINT32_TYPE *xp = (UINT32_TYPE *) source; // Type pun input as an unsigned 32-bit int
    UINT16_TYPE    hs, he, hm;
    UINT32_TYPE x, xs, xe, xm;
    int hes;
    static int next;  // Little Endian adjustment
    static int checkieee = 1;  // Flag to check for IEEE754, Endian, and word size
    double one = 1.0; // Used for checking IEEE754 floating point format
    UINT32_TYPE *ip; // Used for checking IEEE754 floating point format

    if( checkieee ) { // 1st call, so check for IEEE754, Endian, and word size
        ip = (UINT32_TYPE *) &one;
        if( *ip ) { // If Big Endian, then no adjustment
            next = 0;
        } else { // If Little Endian, then adjustment will be necessary
            next = 1;
            ip++;
        }
        if( *ip != 0x3FF00000u ) { // Check for exact IEEE 754 bit pattern of 1.0
            return 1;  // Floating point bit pattern is not IEEE 754
        }
        if( sizeof(INT16_TYPE) != 2 || sizeof(INT32_TYPE) != 4 ) {
            return 1;  // short is not 16-bits, or long is not 32-bits.
        }
        checkieee = 0; // Everything checks out OK
    }

    xp += next;  // Little Endian adjustment if necessary

    if( source == NULL || target == NULL ) { // Nothing to convert (e.g., imag part of pure real)
        return 0;
    }

    while( numel-- ) {
        x = *xp++; xp++; // The extra xp++ is to skip over the remaining 32 bits of the mantissa
        if( (x & 0x7FFFFFFFu) == 0 ) {  // Signed zero
            *hp++ = (UINT16_TYPE) (x >> 16);  // Return the signed zero
        } else { // Not zero
            xs = x & 0x80000000u;  // Pick off sign bit
            xe = x & 0x7FF00000u;  // Pick off exponent bits
            xm = x & 0x000FFFFFu;  // Pick off mantissa bits
            if( xe == 0 ) {  // Denormal will underflow, return a signed zero
                *hp++ = (UINT16_TYPE) (xs >> 16);
            } else if( xe == 0x7FF00000u ) {  // Inf or NaN (all the exponent bits are set)
                if( xm == 0 ) { // If mantissa is zero ...
                    *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7C00u); // Signed Inf
                } else {
                    *hp++ = (UINT16_TYPE) 0xFE00u; // NaN, only 1st mantissa bit set
                }
            } else { // Normalized number
                hs = (UINT16_TYPE) (xs >> 16); // Sign bit
                hes = ((int)(xe >> 20)) - 1023 + 15; // Exponent unbias the double, then bias the halfp
                if( hes >= 0x1F ) {  // Overflow
                    *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7C00u); // Signed Inf
                } else if( hes <= 0 ) {  // Underflow
                    if( (10 - hes) > 21 ) {  // Mantissa shifted all the way off & no rounding possibility
                        hm = (UINT16_TYPE) 0u;  // Set mantissa to zero
                    } else {
                        xm |= 0x00100000u;  // Add the hidden leading bit
                        hm = (UINT16_TYPE) (xm >> (11 - hes)); // Mantissa
                        if( (xm >> (10 - hes)) & 0x00000001u ) // Check for rounding
                            hm += (UINT16_TYPE) 1u; // Round, might overflow into exp bit, but this is OK
                    }
                    *hp++ = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero
                } else {
                    he = (UINT16_TYPE) (hes << 10); // Exponent
                    hm = (UINT16_TYPE) (xm >> 10); // Mantissa
                    if( xm & 0x00000200u ) // Check for rounding
                        *hp++ = (hs | he | hm) + (UINT16_TYPE) 1u; // Round, might overflow to inf, this is OK
                    else
                        *hp++ = (hs | he | hm);  // No rounding
                }
            }
        }
    }
    return 0;
}

int halfp2doubles(void *target, void *source, int numel)
{
    UINT16_TYPE *hp = (UINT16_TYPE *) source; // Type pun input as an unsigned 16-bit int
    UINT32_TYPE *xp = (UINT32_TYPE *) target; // Type pun output as an unsigned 32-bit int
    UINT16_TYPE h, hs, he, hm;
    UINT32_TYPE xs, xe, xm;
    INT32_TYPE xes;
    int e;
    static int next;  // Little Endian adjustment
    static int checkieee = 1;  // Flag to check for IEEE754, Endian, and word size
    double one = 1.0; // Used for checking IEEE754 floating point format
    UINT32_TYPE *ip; // Used for checking IEEE754 floating point format

    if( checkieee ) { // 1st call, so check for IEEE754, Endian, and word size
        ip = (UINT32_TYPE *) &one;
        if( *ip ) { // If Big Endian, then no adjustment
            next = 0;
        } else { // If Little Endian, then adjustment will be necessary
            next = 1;
            ip++;
        }
        if( *ip != 0x3FF00000u ) { // Check for exact IEEE 754 bit pattern of 1.0
            return 1;  // Floating point bit pattern is not IEEE 754
        }
        if( sizeof(INT16_TYPE) != 2 || sizeof(INT32_TYPE) != 4 ) {
            return 1;  // short is not 16-bits, or long is not 32-bits.
        }
        checkieee = 0; // Everything checks out OK
    }

    xp += next;  // Little Endian adjustment if necessary

    if( source == NULL || target == NULL ) // Nothing to convert (e.g., imag part of pure real)
        return 0;

    while( numel-- ) {
        h = *hp++;
        if( (h & 0x7FFFu) == 0 ) {  // Signed zero
            *xp++ = ((UINT32_TYPE) h) << 16;  // Return the signed zero
        } else { // Not zero
            hs = h & 0x8000u;  // Pick off sign bit
            he = h & 0x7C00u;  // Pick off exponent bits
            hm = h & 0x03FFu;  // Pick off mantissa bits
            if( he == 0 ) {  // Denormal will convert to normalized
                e = -1; // The following loop figures out how much extra to adjust the exponent
                do {
                    e++;
                    hm <<= 1;
                } while( (hm & 0x0400u) == 0 ); // Shift until leading bit overflows into exponent bit
                xs = ((UINT32_TYPE) hs) << 16; // Sign bit
                xes = ((INT32_TYPE) (he >> 10)) - 15 + 1023 - e; // Exponent unbias the halfp, then bias the double
                xe = (UINT32_TYPE) (xes << 20); // Exponent
                xm = ((UINT32_TYPE) (hm & 0x03FFu)) << 10; // Mantissa
                *xp++ = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
            } else if( he == 0x7C00u ) {  // Inf or NaN (all the exponent bits are set)
                if( hm == 0 ) { // If mantissa is zero ...
                    *xp++ = (((UINT32_TYPE) hs) << 16) | ((UINT32_TYPE) 0x7FF00000u); // Signed Inf
                } else {
                    *xp++ = (UINT32_TYPE) 0xFFF80000u; // NaN, only the 1st mantissa bit set
                }
            } else {
                xs = ((UINT32_TYPE) hs) << 16; // Sign bit
                xes = ((INT32_TYPE) (he >> 10)) - 15 + 1023; // Exponent unbias the halfp, then bias the double
                xe = (UINT32_TYPE) (xes << 20); // Exponent
                xm = ((UINT32_TYPE) hm) << 10; // Mantissa
                *xp++ = (xs | xe | xm); // Combine sign bit, exponent bits, and mantissa bits
            }
        }
        xp++; // Skip over the remaining 32 bits of the mantissa
    }
    return 0;
}

score 0 · Accepted Answer

考虑为此使用 HDF5。HDF5 是一种标准文件格式（具有 C 和 C++ 实现），可让您存储多种类型的数据，但它特别适用于数值矩阵。如果您使用浮点（32 位）值将数据存储为压缩的 HDF5 数据集，我敢打赌它甚至会比 Matlab 结果小得多。

http://www.hdfgroup.org/HDF5/Tutor/compress.html

c++ - 如果可以接受一些精度损失，如何快速紧凑地保存双矩阵？

2 回答 2

编辑

代码

Related

Reference