3

我正在尝试将很多 x/y 双点写入文件。

我想出了以下功能,这是迄今为止最快的解决方案。

有没有其他方法可以加快这个过程?

首先写入字符串流,然后打开文件,可以很好地提高速度。

bool printPoints(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> > const_iterator i;

    if(file != "")
    {
        stringstream ss;
        for(i=points.begin(); i != points.end();++i )
        {
           ss << i->first << " " << i->second << "\n";
        }

        ofstream out(file.c_str());
        if(out.fail())
        {
            out.close();
            return false;
        }
        out << ss.str();
        out.close();
    }
    return true;
}
4

5 回答 5

1

您可以通过一次以二进制格式写入所有坐标来提高速度

ofstream out(file.c_str(),std::ios_base::binary);
out.write(reinterpret_cast<const char*>(points.begin()),sizeof(double)*2*points.size());

如果点没有连续存储在内存中以配对(对于向量来说),它可能不起作用,那么您可以将它复制到双倍向量中的双倍单向量(x,y,x,y ...),接下来写入磁盘。

于 2013-07-18T12:25:38.233 回答
1

坐标的序列化可以通过将输入划分到多个线程然后连接它们的返回值来并行化。然后将返回值写入文件。这样我们可以加快这个过程。

于 2013-07-18T12:21:12.243 回答
1

我对此进行了测试。写信给stringstream你几乎一无所获。使用FILE *而不是fstream确实给出了合理的改进。

这是我的测试代码:

#include <vector>
#include <utility>
#include <fstream>
#include <iostream>
#include <sstream>
#include <cstdio>

using namespace std;

bool printPoints(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> >::const_iterator i;

    if(file != "")
    {
        stringstream ss;
        for(i=points.begin(); i != points.end();++i )
        {
           ss << i->first << " " << i->second << "\n";
        }

        ofstream out(file.c_str());
        if(out.fail())
        {
            out.close();
            return false;
        }
        out << ss.str();
        out.close();
    }
    return true;
}

bool printPoints2(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> >:: const_iterator i;

    if(file != "")
    {
        ofstream out(file.c_str());
        if(out.fail())
        {
            out.close();
            return false;
        }
        for(i=points.begin(); i != points.end();++i )
        {
           out << i->first << " " << i->second << "\n";
        }

        out.close();
    }
    return true;
}


bool printPoints3(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> >:: const_iterator i;

    if(file != "")
    {
    FILE *out = fopen(file.c_str(), "w");
        if(!out)
        {
            return false;
        }
        for(i=points.begin(); i != points.end();++i )
        {
        fprintf(out, "%f %f", i->first, i->second);
        }

        fclose(out);
    }
    return true;
}

static __inline__ unsigned long long rdtsc(void)
{
    unsigned hi, lo;
    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}

int main()
{
    vector <pair <double,double> >  v;
    unsigned long long t1, t2;

    for(int i = 1; i <= 10000000; i++)
    {
    v.push_back(make_pair<double, double>((double)i, 1.0/i)); 
    }
    t1 = rdtsc();
    printPoints(v, "points.txt");
    t2 = rdtsc();
    cout << "time = " << t2 - t1 << endl;
    t1 = rdtsc();
    printPoints2(v, "points2.txt");
    t2 = rdtsc();
    cout << "time = " << t2 - t1 << endl;
    t1 = rdtsc();
    printPoints3(v, "points3.txt");
    t2 = rdtsc();
    cout << "time = " << t2 - t1 << endl;
}   

Results:
time = 55363637480
time = 54413392112
time = 33069402767

显然,结果可能会因处理器类型、内存类型、硬盘系统(或网络驱动器存储)等而有所不同。但我过去对此进行了测试,并发现了类似的结果。

于 2013-07-18T12:30:28.533 回答
0

Would you consider memory-mapped-file? Just copy necessary data (in serialized representation, of course) to a memory area returned by in-mem file mapping, then close the mapping. From my past experience, this is quite fast way to transfer large amounts of data from STL structures to file.

于 2013-07-18T12:50:12.877 回答
0
typedef pair<double, double> PDD;

namespace std {

  inline
  ostream&
  operator<<(ostream& os, const PDD& p)
  {
    return os << p.first << ' ' << p.second;
  }

}

bool
PrintPoints(const vector<PDD>& points, const string& file)
{
  if (points.empty() || file.empty())
    return false;

  ofstream fout(file.c_str());
  if (!fout)
    return false;

  copy(points.begin(), points.end(), ostream_iterator<PDD>(fout, "\n"));

  return true;
}

关闭将由ofstreamdtor 调用。

于 2013-07-18T14:19:51.707 回答