我正在做一个项目来比较 2 个字符串搜索算法的时间复杂度。由于某些问题,我丢失了以前的代码,因此不得不重写大部分代码。然而,这一次,我的 KMP 算法似乎运行得慢了很多,而且无论我输入什么,我都无法真正让它比我的 Boyer-Moore 运行得更快。这是我的代码:
#include <fstream>
#include <iostream>
#include <algorithm>
#include <vector>
#include <chrono>
using namespace std;
//set number of characters for lookup table
# define number_chars 256
//type alias for clock
typedef chrono::steady_clock clocktime;
void boyer_moore(string text, string pattern, int textlength, int patlength) {
//start timer
clocktime::time_point start = clocktime::now();
//create vector list of all the locations the pattern was found
vector<int> indexes;
//create array for lookup table
int lookup[number_chars];
//initially fill every index of lookup table with -1
for (int i = 0; i < number_chars; i++) {
lookup[i] = -1;
}
//for every character in pattern, set lookup table at index = integer value for character to be the index of the character within the pattern
for (int i = 0; i < patlength; i++) {
lookup[(int)pattern[i]] = i;
}
//shift variable for keeping track of location in text
int shift = 0;
while (shift <= (textlength - patlength)) {
int j = patlength - 1;
//works through pattern from end to start checking if characters in text match, stops when j<0 or character in text doesn't match character in pattern
while (j >= 0 && pattern[j] == text[shift + j]) {
j--;
}
//if pattern found
if (j < 0) {
//add location pattern was found to location list
indexes.push_back(shift);
//if have not reached end of text
if (shift + patlength < textlength) {
//skip over pattern just found so next character in text lines up with last character of pattern
shift += patlength - lookup[text[shift + patlength]];
}
//if pattern occurs at end of text add 1 to shift -> next while loop will fail
else {
shift += 1;
}
}
//if pattern not found
else {
//checks if current character in text is an ASCII character -> if not it would be negative
//(was having some issues previously with non-ASCII characters setting my array to out of bounds so added this condition)
if (lookup[text[shift + j]] > -1) {
//shifts pattern so unmtaching character in text lines up with last character in pattern
shift += max(1, j - lookup[text[shift + j]]);
}
//if non-ASCII character, then it is not in pattern being searched anyway -> skip to next character
else {
shift += 1;
}
}
}
//end timer
clocktime::time_point end = clocktime::now();
//calculate time taken
auto time_taken = chrono::duration_cast<chrono::milliseconds>(end - start).count();
//display index locations of pattern
cout << "Index locations of '" << pattern << "':" << endl;
for (int in : indexes) {
cout << in << ", ";
}
//display time taken
cout << endl << endl << endl << "Boyer-Moore: time taken: " << time_taken << "ms" << endl << endl << endl;
}
void kmp(string text, string pattern, int textlength, int patlength) {
//start time
clocktime::time_point start = clocktime::now();
//create vector list of all the locations the pattern was found
vector<int> indexes;
//creates prefix table
//had to use vector as compiler was complaining about patlength not being a constant value, even when I set it it const int
vector<int> table(patlength);
//keeps track of length of previous longest prefix suffix
int k = 0;
//filling in prefix table
for (int i = 1; i < patlength; i++) {
while (k > 0 && pattern[k] != pattern[i]) {
//u
k = table[k-1];
}
if (pattern[k] == pattern[i]) {
//update longest suffix
k = k + 1;
}
table[i] = k;
}
int textindex = 0;
int patindex = 0;
while (textindex < textlength) {
if (pattern[patindex] == text[textindex]) {
textindex++;
patindex++;
}
if (patindex == patlength) {
indexes.push_back(textindex-patlength);
patindex = table[patindex - 1];
}
else if (textindex < textlength && pattern[patindex] != text[textindex]){
if (patindex != 0) {
patindex = table[patindex - 1];
}
else {
textindex++;
}
}
}
//end timer
clocktime::time_point end = clocktime::now();
auto time_taken = chrono::duration_cast<chrono::milliseconds>(end - start).count();
cout << "Index locations of '" << pattern << "':" << endl;
for (int in : indexes) {
cout << in << ", ";
}
cout << endl << endl << endl << "KMP: time taken: " << time_taken << "ms" << endl << endl << endl;
}
//load file method borrowed from lab exercise
string load_file() {
std::string directory = "";
string text;
for (int i = 0; i < 6; i++) {
const string & filename = "dna.txt";
//https://www.kaggle.com/ashishsinhaiitr/lord-of-the-rings-text/version/1#01%20-%20The%20Fellowship%20Of%20The%20Ring.txt
ifstream f(directory + filename, std::ios_base::binary);
if (!f.good()) {
directory = "../" + directory;
continue;
}
f.seekg(0, std::ios_base::end);
const size_t length = f.tellg();
vector<char> buf(length);
f.seekg(0);
f.read(buf.data(), length);
text.assign(buf.begin(), buf.end());
return text;
}
}
void main() {
//loads text file
string text = load_file();
bool end = false;
while (end == false) {
//gets pattern from user
cout << "pattern: ";
string pattern;
cin >> pattern;
int n = text.size();
int m = pattern.size();
//call both string search algorithms
boyer_moore(text, pattern, n, m);
kmp(text, pattern, n, m);
//ask user if they want to end the program
cout << "Close program? (1 = yes, 0 = no): ";
bool valid = false;
//checks if user provided valid response -> asks for another if not
while (valid == false) {
int userend;
cin >> userend;
if ((int)userend == 1) {
valid = true;
end = true;
}
else if ((int)userend == 0) {
valid = true;
}
else {
"Please enter a valid number: ";
}
}
}
}
例如,在我之前的结果中,对于单词“the”,Boyer-Moore 大约需要 260 毫秒,而 KMP 大约需要 184 毫秒。现在,Boyer-Moore 当然仍然需要大约 260 毫秒,但是 KMP 现在需要大约 330 毫秒。任何指向正确方向的指针将不胜感激。谢谢你。