4

有时,当您从文档中复制代码时,它会得到行号和奇怪的引号。我已经编写了一个脚本来删除那些初始数字,但是很难找到一种方法来删除那些奇怪的引号''“”所以我已经包含了我的完整代码。它读入一个文件并输出一个格式化的文件。但是编译器警告说这些引号是多字符,我猜这意味着非标准的 ascii 字符。它有点工作,但它不是一个很好的解决方案。任何帮助表示赞赏:

#include <iostream>
#include <fstream>
#include <string>

using namespace std;

string replaceChar(string str, char ch1, char ch2);

// Main
int main(int argc, char *argv[]) {

    string line;

    fstream stri, stro;
    // ifstream in
    stri.open(argv[1], ios::in);
    if(stri.fail()){
        cerr << "File failed to open for input" << endl;
        return 1;
    }

    // ofstream out
    stro.open("file_out.txt", ios::out);
    if(stro.fail()){
        cerr << "File failed to open for output" << endl;
        return 1;
    }

    // Read - Write
    //stri.get(c);
    getline(stri, line, '\n');
    while(!stri.eof()){
        // Remove numbers
        line.erase(0,3);

        //line.replace( line.begin(), line.end(), "‘", "\'" );
        //line.replace( line.begin(), line.end(), "’", "\'" );
        //line.replace( line.begin(), line.end(), "“", "\'" );
        //line.replace( line.begin(), line.end(), "”", "\'" );
        line = replaceChar(line, '‘','\'');
        line = replaceChar(line, '’','\'');
        line = replaceChar(line, '“','\"');
        line = replaceChar(line, '”','\"');

        stro << line << endl;
        getline(stri, line, '\n');
    }

    // Close files
    stri.close();
    stro.close();

    // Output
    cout << "File Edited Ok!";
    //cout << count -1 << " characters copied."<< endl; 
}

string replaceChar(string str, char ch1, char ch2) {
  for (int i = 0; i < str.length(); ++i) {
    if (str[i] == ch1)
      str[i] = ch2;
  }

  return str;
}
4

2 回答 2

3

好吧,它不漂亮,但它有效。任何想要细化搜索那些该死的奇怪引号之一的人都是我的客人!

#include <iostream>
#include <fstream>
#include <string>

using namespace std;

// Function Declaration
bool replace(string& str, const string& from, const string& to);

bool checkMyLine(string line);

// Main
int main(int argc, char *argv[]) {

    // line to edit
    string line;

    fstream stri, stro;
    // ifstream in
    stri.open(argv[1], ios::in);
    if(stri.fail()){
        cerr << "File failed to open for input" << endl;
        return 1;
    }

    // ofstream out
    stro.open("file_out.txt", ios::out);
    if(stro.fail()){
        cerr << "File failed to open for output" << endl;
        return 1;
    }

    // Read - Write
    while(getline(stri, line, '\n')){

        // Remove numbers at start of each line followed by space, eg: "001: "
    int i;
    for(i = 0;i < line.length();i++)
    {
        if(line[i] == ' ') break;
    }
    line.erase(0,i+1);

        //Replace Odd Chars
        for(int i=0;i<line.length();i++)
        {
        replace(line, "\u2018","\u0027");   // replaces ‘
        replace(line, "\u2019","\u0027");   // replaces ’
        replace(line, "\u201C","\u0022");   // replaces “
        replace(line, "\u201D","\u0022");   // replaces ”
        }

        // Write to file
        stro << line << endl;
    }

    // Close files
    stri.close();
    stro.close();

    // Output Message
    cout << "File Edited Ok!";
}// End of Main
//
bool replace(string& str, const string& from, const string& to) 
{
    size_t start_pos = str.find(from);
    if(start_pos == string::npos)
        return false;
    str.replace(start_pos, from.length(), to);
    return true;
}
于 2013-06-30T18:01:41.397 回答
0

你写了什么样的脚本来删除前导数字?您可以访问sedtr吗?它们的存在就是为了解决这类问题。

sed -e 's/[‘’“”]//g'

无需重新发明轮子

于 2013-06-30T16:56:30.870 回答