0

我在一个文件中有数百万条记录,需要进行一些计算。为此,我有 java 程序和 c++ 程序的相同副本,但 Java 的执行速度比 c++ 快。我切换到 C++ 的主要原因是执行多线程以使程序运行得更快。但是当我比较 java 和 c++ 之间的 1 个线程工作时,java 在一半的时间内完成了这项工作。

我需要解决这个问题。C++ 假设速度更快,但性能很差。

一些好的提醒会很好,所以我可以研究并尝试修复它。

谢谢

这是从逗号分隔的数据中生成对象的类

//Parser.cpp 
#include "Parser.h"
#include "PriceBar.h"
#include <iostream>
#include <string>
#include <fstream>
#include <sstream>
#include <stdlib.h>


using namespace std;

vector<PriceBar> Parser :: parseFile(string file){

    string STRING;
    vector<PriceBar> bars;
    ifstream infile;
    infile.open (file.c_str());
    int a=0;
    string token;


while(getline(infile,STRING)) // To get you all the lines.
{
    vector<string> data;
    istringstream ss(STRING);
    while(getline(ss, token, ',')) {
                data.push_back(token);
            }
    //cout<<data[4]<<endl;

    if(!data[1].empty()){
                            //cout << "if is working" << endl;
                            double open = atof(data[1].c_str());
                            double high = atof(data[2].c_str());
                            double low = atof(data[3].c_str());
                            double close = atof(data[4].c_str());
                            bars.push_back(PriceBar(open, high, low, close));
                        }//end of if

}//end of while
infile.close();
//cout << "parser is done " << bars[2].getOpen() <<endl;
//cout << bars.size() << endl;
return bars;

}

价格栏类

/*
 * PriceBar.cpp
 *
 *  Created on: Nov 5, 2013
 *      Author: hansaka
 */

#include <iostream>
#include <string>
#include <vector>
#include "PriceBar.h"

using namespace std;

PriceBar :: PriceBar(double open, double high, double low, double close){
this -> open = open;
this -> high = high;
this -> low = low;
this -> close = close;
}

double PriceBar :: getOpen() {
    return open;
}
void PriceBar :: setOpen(double open) {
    this -> open = open;
}
double PriceBar :: getHigh() {
    return high;
}
void PriceBar :: setHigh(double high) {
    this -> high = high;
}
double PriceBar :: getLow() {
    return low;
}
void PriceBar :: setLow(double low) {
    this -> low = low;
}
double PriceBar :: getClose() {
    return close;
}
void PriceBar :: setClose(double close) {
    this -> close = close;
}

主文件

#include <iostream>
#include <vector>
#include <string>
#include "PriceBar.h"
#include "Parser.h"
#include <ctime>

using namespace std;

int main() {
Parser p;

//getting the counter ready
time_t tstart, tend;

//Starting the time
tstart = time(0);

vector<string> path;
path.push_back("file.csv");

for( vector<string>::const_iterator it = path.begin(); it != path.end(); ++it ){
  //    cout << *it << endl;
    vector<PriceBar> priceBars = p.parseFile(*it);
    //priceBars = p.parseFile(*it);

//      cout << "done" << endl;

    double maxHigh = 0.0;
    double maxLow = 0.0;
    double maxOpen = 0.0;
    double maxClose = 0.0;
    double maxVolume = 0.0;
    double current = 0.0;

  //     cout << "hippy " << priceBars[2].getOpen() <<endl;
   int size = priceBars.size();
 //      cout << "size = " << size << endl;


    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxOpen) {
            maxOpen = current;
        }
    }//end of pricebar for

    current = 0.0;
    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxHigh) {
            maxHigh = current;
        }
    }
    current = 0.0;
    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxLow) {
            maxLow = current;
        }
    }
    current = 0.0;
    for (int j=0;j<size;j++) {
        current = priceBars[j].getOpen();
        if (current > maxClose) {
            maxClose = current;
        }
    }

                cout << "MaxHigh =" << maxOpen << " MaxLow = " << maxHigh
                        << " MaxHigh =" << maxLow << " MaxLow = " << maxClose << endl;


}//end of it for
cout << "DONE" << endl;

//Ending the time count
tend = time(0);

cout << " It took " << difftime(tend, tstart) << " second(s).";

return 0;
}

我一直在编辑这段代码,所以没有太多评论,我刚刚评论了一些代码部分供我参考,对此我深表歉意。

4

1 回答 1

2

我会做几件事:

  1. 将在读取文件期间使用的对象的构造移出循环,以清除它们。构建流并不便宜,并且一直为向量和字符串分配内存而不是重用它是无效的。
  2. 我不会真正存储 a std::vector<std::string>,因为存储 a 就足够了std::vector<double>,直接转换各个值。
  3. 当前代码不检查它是否实际上在每一行上读取了足够的条目,这可能导致对错误格式的文件进行越界访问。如果文件包含超过 5 列,则可能不解码尾随列。
  4. main()我会打电话std::ios_base::sync_with_stdio(false);。不过,我希望它不会产生太大影响(它确实提高了使用标准流对象的性能,但我可以想象它也会影响文件流,以实现非常糟糕的 IOStream 实现)。
  5. 由于 IOStream 通常在标头中实现,因此启用优化非常重要,至少对于具有 I/O 的翻译单元而言。

这是我编写函数的方式:

std::vector<PriceBar> Parser::parseFile(std::string const& file) {
    std::vector<PriceBar> bars;
    std::ifstream         infile(file.c_str());
    std::istringstream    lin;
    std::vector<double>   columns;

    for (std::string line, topic, value; std::getline(infile, line); ) {
        lin.clear();
        lin.str(line);
        columns.clear();
        for (std::getline(lin, topic, ','); getline(ss, value, ',')) {
            columns.push_back(value.empty()? 0.0: std::atof(value.c_str()));
        }
        if (columns.size() == 4) {
            bars.push_back(PriceBar(columns[0], columns[1], columns[2], columns[3]));
        }
    }
    return bars;
}

我不认为处理多个线程会有多大帮助。读取只有一百万行左右的小文件并不能保证相应的复杂性。

于 2013-11-07T07:17:57.313 回答