c++ - 不同架构上的 C 文件操作

Question

作为一个研究项目，我们正在标准 C (BINARY) 文件处理库 (stdio) 之上编写一个抽象层，通过提供一些额外的函数来处理带有事务的文件。

工作流程如下：

用户使用我们的 API（或标准）打开文件fopen。两者都返回FILE*。文件以 BINARY 模式打开！
用户使用标准库命令（例如fwrite）将数据写入文件
用户使用我们的 API 在打开的文件上打开一个事务：TRANSACTION a = trans_start(FILE* )
TRANSACTION用户为对象设置数据验证器（set_validator(TRANSACTION, int(*)(char*))
用户使用我们自己的 API 将数据“写入”文件（int trans_write_string(TRANSACTION*, char*, length)
- 实际上，这种“写入”将其数据放入上面定义的验证器的内存中，这些验证器可能会对数据进行操作并在某处设置一些标志……与问题无关。
用户使用trans_commit(TRANSACTION)以实际将数据写入文件。现在，根据验证器设置的标志，这可能不会将数据写入文件，而是向用户报告错误（可以通过编程方式解决。...与问题不太相关）。
用户使用标准 API 关闭文件fclose。

到目前为止，我们只有 API ( trans_write_string) 的字符串处理方法，它工作得很好。它在内存中构建自己的数据缓冲区，根据需要修改内容，调用验证器等...在连续调用时，它将新数据附加到其内部内存缓冲区中，处理分配等...并在成功提交时写入使用fwrite（是的，这主要是一个 C 项目，但是 C++ 答案也不会被排除）。

但是现在我们想要（......必须）扩展 API 以便能够写入数字（16 位、32 位、64 位）并且也可以浮动......以与标准Cstdio API非常相似的方式它。使用已经存在的字符串实现，这假设我们在内存中有一个数据缓冲区，它保存N字符字节（字符串本身），我们可能需要 2 个字节用于 16 位值，然后另一个M字节用于另一个字符串，64 位值的 8 个字节，16 位值的 2 个字节，等等......

我们陷入了“如何在文件中表示一个数字，以便使用不同计算机/架构/操作系统/字节序的其他人也可以读取”这一点上。

char* addr = &my_16bit_int理论上可以通过转换为 char ( )将数字插入内存流中，*(addr)并将其放置*(addr + 1)到所需的地址（即：在N字符串的字符之后）并将其写入文件也是可能的，但是如果我想要怎么办在字节序不同的不同架构上读取生成的文件？如果“其他”计算机只是一堆 16 位的古老金属怎么办？在这种情况下，写入文件中的 64 位值会发生什么情况？

解决此类问题有哪些好的做法？

编辑：目标文件必须是二进制的，它会附带一个描述其格式的文本文件（XML）（例如：N8字节字符，116位值等）（这个文本文件是根据我们的输出生成的心爱的验证者）。验证器“说”类似，是的，我接受这个 16 位值，不，我拒绝这个长字符串，等等......并且其他人正在基于这个“输出”创建数据格式 XML。

EDIT2：是的，我们需要在各种平台上共享文件，甚至是 20 年的大冰箱大小的盒子 :)

EDIT3：是的，我们也需要浮动！

score 2 · Accepted Answer

铸造是不够的，我认为套接字方法对于 int16htons和htonlint32 来说是足够的解决方案。对于 int64 你应该自己构建它，因为没有官方方法：

请注意，所有功能仅在需要时才反转字节顺序，因此您也可以使用相同的方法来“修复”数字恢复正常。

typedef union{
    unsigned char c[2];
    unsigned short s;
}U2;

//you can use the standard htons or this
unsigned short htons(unsigned short s)
{
    U2 mask,res;
    unsigned char* p = (unsigned char*)&s; 
    mask.s = 0x0001;
    res.c[mask.c[0]] = p[0];
    res.c[mask.c[1]] = p[1];
    return res.s;
}

//the same for 4 bytes
typedef union{
    unsigned char c[4];
    unsigned short s[2];
    unsigned long l;
}U4;

//you can use the standard htonl or this
unsigned long htonl(unsigned long l)
{
    U4 mask,res;
    unsigned char* p = (unsigned char*)&l; 
    mask.l = 0x00010203;
    res.c[mask.c[0]] = p[0];
    res.c[mask.c[1]] = p[1];
    res.c[mask.c[2]] = p[2];
    res.c[mask.c[3]] = p[3];
    return res.l;
}

typedef union{
    unsigned char c[8];
    unsigned char c2[2][4];
    unsigned short s[4];    
    unsigned long l[2];
    unsigned long long ll; 
}U8; 

//for int64 you can use the int64 and do the same, or you can to do it with 2*4 like i did
//you can give a void pointer as well.. 
unsigned long long htonll(unsigned long long ll)//void htonll(void* arg, void* result)
{
    U2 mask1;
    U4 mask2;
    U8 res;

    unsigned char* p = (unsigned char*)&ll; //or (unsigned char*)arg   
    mask1.s = 0x0001;
    mask2.l = 0x00010203;
    //I didn't use the int64 for convertion 
    res.c2[mask1.c[0]][mask2.c[0]] = p[0];
    res.c2[mask1.c[0]][mask2.c[1]] = p[1];
    res.c2[mask1.c[0]][mask2.c[2]] = p[2];
    res.c2[mask1.c[0]][mask2.c[3]] = p[3];
    res.c2[mask1.c[1]][mask2.c[0]] = p[4];
    res.c2[mask1.c[1]][mask2.c[1]] = p[5];
    res.c2[mask1.c[1]][mask2.c[2]] = p[6];
    res.c2[mask1.c[1]][mask2.c[3]] = p[7];

    //memcpy(result,res.c,8);
    return res.ll;
}
//or if you want to use the htonl:
unsigned long long htonll2(unsigned long long ll)
{
    U2 mask1;
    U8 res;
    mask1.s = 0x0001;
    unsigned long* p = (unsigned long*)&ll;
    res.l[0] = htonl(p[mask1.c[0]]);
    res.l[1] = htonl(p[mask1.c[1]]);
    return res.ll;
}

int main()
{
    unsigned short s = 0x1122;
    cout<<hex<<htons(s)<<endl;
    unsigned long l = 0x11223344;
    cout<<hex<<htonl(l)<<endl;
    unsigned long long ll=0x1122334455667788;
    cout<<hex<<htonll(ll)<<endl;
    cout<<hex<<htonll2(ll)<<endl;
    return 0;
}

score 1 · Accepted Answer

您必须定义一种格式，或选择现有的二进制格式（如 XDR）并对其进行读写。因此，例如，在 XDR 中写入一个 32 位整数：

void
write32Bits( FILE* dest, uint_least32_t value )
{
    putc( (value >> 24) & 0xFF, dest );
    putc( (value >> 16) & 0xFF, dest );
    putc( (value >>  8) & 0xFF, dest );
    putc( (value      ) & 0xFF, dest );
}

浮点更复杂，但如果您愿意将平台限制为支持 IEEE 浮点的平台，您可以键入 pun floattouint32_t和doubleto uint64_t，并将其输出为无符号整数。同样，如果您将自己限制为具有 32 位整数类型的 2 的补码机器，您也可以使用上面的移位和掩码过程来处理有符号值（并且整数类型是uint32_tand int32_t）。

关于可移植性：我认为 IEEE 是通用的，除了大型机，2 的补码是通用的，除了非常奇特的大型机。（IBM 大型机是 2 的补码，但不是 IEEE。Unisys 大型机不是 2 的补码，也没有 32 位整数类型。我不确定其他大型机是否还存在，但过去都有各种异国情调。）

score 1 · Accepted Answer

如果您使用的是 glibc，那么您可以使用它的函数来进行 le <-> be 来自“endian.h”的转换：

SYNOPSIS
   #define _BSD_SOURCE             /* See feature_test_macros(7) */
   #include <endian.h>

   uint16_t htobe16(uint16_t host_16bits);
   uint16_t htole16(uint16_t host_16bits);
   uint16_t be16toh(uint16_t big_endian_16bits);
   uint16_t le16toh(uint16_t little_endian_16bits);

   uint32_t htobe32(uint32_t host_32bits);
   uint32_t htole32(uint32_t host_32bits);
   uint32_t be32toh(uint32_t big_endian_32bits);
   uint32_t le32toh(uint32_t little_endian_32bits);

   uint64_t htobe64(uint64_t host_64bits);
   uint64_t htole64(uint64_t host_64bits);
   uint64_t be64toh(uint64_t big_endian_64bits);
   uint64_t le64toh(uint64_t little_endian_64bits);

如果你不使用 glibc，那么你可以看看 glibc-2.18/bits/byteswap.h

c++ - 不同架构上的 C 文件操作

3 回答 3

Related

Reference