0

我有一个 mingw 错误:我无法读取大的二进制文件(3.2 gb)。我的测试代码:

#include <string>
#include <vector>
#include <iostream>
#include <filesystem>
#include <fstream>
#include <cassert>

namespace fs = std::filesystem;

template<typename TYPE>
inline void read_binary_file(const fs::path filename, std::vector<TYPE>& result)
{

  std::ifstream file(filename, std::ios::in | std::ios::binary |std::ios::ate);

  file.seekg(0, std::ios::end);
  size_t filesize = file.tellg();
  file.seekg(0, std::ios::beg);

  //size_t filesize = fs::file_size(filename); !!!!!<------------ BUG FOR mingw32 WITH gcc 10.2. Should be ok with gcc 10.3

  assert(filesize%sizeof(TYPE) == 0);
  result.resize(filesize/sizeof(TYPE));

  std::cout << "result size : " << result.size();

  file.read(reinterpret_cast<char *>(result.data()), filesize);

  file.close();
}


int main()
{
  {
  fs::path path = "idx-position-Deces_Agit_FrHex_aPartir1979_Dom_aPartir2000_enCours-liens_age_tranche_age#age_quinquenal_0_100.dat";
  std::vector<uint32_t> result;

  read_binary_file<uint32_t>(path, result);

  for(int i=0; i<10; ++i)
    std::cout << result[i] << std::endl;

  std::cout << std::endl;

  std::cout << result[result.size()-3] << std::endl;
  std::cout << result[result.size()-2] << std::endl;
  std::cout << result[result.size()-1] << " " << uint32_t(-1) << std::endl;
  }


  std::cout << std::endl;
  std::cout << std::endl;
  std::cout << std::endl;

  {

    fs::path path = "idx-position-ALD_prevalence_MSA_2017_Communes-liens_maladie_ald#CIM10_ALD0516.dat";
    std::vector<uint32_t> result;

    read_binary_file<uint32_t>(path, result);

    for(int i=0; i<10; ++i)
      std::cout << result[i] << std::endl;

    std::cout << std::endl;

    std::cout << result[result.size()-3] << std::endl;
    std::cout << result[result.size()-2] << std::endl;
    std::cout << result[result.size()-1] << " " << uint32_t(-1) << std::endl;
  }

  return 0;
}

我已经在两个不同的系统(Linux 上的 gcc 和 Windows 上的 mingw)上的 2 个文件(~3 mo 和 ~3 go)上尝试了此代码。在linux上,我有这个输出:

result size : 8006943820
1
2
4
6
8
10
12
15
18

1174722779
1174722780
4294967295 4294967295



result size : 8554610
1
2
3
4
5
6
7
8
9

869037
869038
4294967295 4294967295

但是在 Windows 上,对于 3 go 文件,函数 std::ifstream::read 似乎什么都不做。我的输出:

filesize : 3202777528 
sizeof : 4 
filesize/sizeof : 800694382 
result size : 800694382
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 
0 4294967295 


filesize : 3421844 
sizeof : 4 
filesize/sizeof : 855461 
result size : 8554610 
1 
2 
3 
4 
5 
6 
7 
8 
9 
869037 
869038 
4294967295 4294967295

这些文件在 linux 和 windows 上是相同的。我的 mingw 版本是:

GNU Make 4.3 Built for Windows32 Copyright (C) 1988-2020 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law.

gcc 在 10.2 版中。

4

0 回答 0