我正在尝试实现 Boyer-Moore 字符串搜索算法。搜索算法本身似乎工作正常,直到某一点。它打印出所有匹配项,直到达到 3300 个字符区域左右,然后不再进行搜索。我不确定这是否与文本文件太大而无法放入我的字符串或完全不同的东西有关。当我尝试打印保存文本文件的字符串时,它也会切断前 185122 个字符。作为参考,文本文件是指环王:指环王——它有 1016844 个字符长。这是我的参考代码:
#include <fstream>
#include <iostream>
#include <algorithm>
#include <vector>
#include <chrono>
using namespace std;
# define number_chars 256
typedef std::chrono::steady_clock clocktime;
void boyer_moore(string text, string pattern, int textlength, int patlength) {
clocktime::time_point start = clocktime::now();
vector<int> indexes;
int chars[number_chars];
for (int i = 0; i < number_chars; i++) {
chars[i] = -1;
}
for (int i = 0; i < patlength; i++) {
chars[(int)pattern[i]] = i;
}
int shift = 0;
while (shift <= (textlength - patlength)) {
int j = patlength - 1;
while (j >= 0 && pattern[j] == text[shift + j]) {
j--;
}
if (j < 0) {
indexes.push_back(shift);
if (shift + patlength < textlength) {
shift += patlength - chars[text[shift + patlength]];
}
else {
shift += 1;
}
}
else {
shift += max(1, j - chars[text[shift + j]]);
}
}
clocktime::time_point end = clocktime::now();
auto time_taken = chrono::duration_cast<chrono::milliseconds>(end - start).count();
for (int in : indexes) {
cout << in << endl;
}
}
int main() {
ifstream myFile;
//https://www.kaggle.com/ashishsinhaiitr/lord-of-the-rings-text/version/1#01%20-%20The%20Fellowship%20Of%20The%20Ring.txt
myFile.open("lotr.txt");
if (!myFile) {
cout << "no text file found";
}
string text((istreambuf_iterator<char>(myFile)), (istreambuf_iterator<char>()));
cout << text;
string pattern;
cin >> pattern;
int n = text.size();
int m = pattern.size();
boyer_moore(text, pattern, n, m);
}
我试图对可能的原因进行一些研究,但找不到遇到此特定问题的任何人。将不胜感激任何朝着正确方向的推动。