// n -> length of the text
// m -> length of the pattern
void rabin_karp_check(int n, int m){
int h_p = hash_value(pattern, m, 0);
int h_t = hash_value(text, m, 0);
int x = 0;
int i = 0,k;
while(i < n - m + 1){
if(x > 0){
h_t = rolling_hash(h_t, m, i);
}
x++;
int j = i;
if(h_t == h_p){
int match_count = 0;
k = 0;
while( match_count < m){
if(pattern[k] == text[j]){
match_count++; k++; j++;
}
else
break;
}
if(match_count == m)
cout<<"found at "<<i<<"\n";
}
i++;
}
}
func 使用 hash_formula 计算模式的哈希值和文本大小为 m 的初始幻灯片
//(256^m-1 * p[0] + 256^m-2 * p[1] + 256^m-3 * p[2] + .... + 256^0 * p[m-1]) % q
int hash_value(string &p, int &m, int i){
int q = 101,k = 0;
long long int l = 0;
for(int j = i;j < i + m ; j++){
l = l + pow(256, m - 1 - k) * p[j];
k++;
}
return l % q;
}
使用先前计算的计算下一个哈希值的函数
int rolling_hash(int h_t, int m, int i){
int x = ( ( (h_t - ((long int)pow(256, m - 1) * text[i-1])) * 256) + text[i + m - 1] ) % 101;
return (x < 0) ? x + 101 : x;
}
输出#1:
enter the text: platform for showcasing
enter the pattern: for
found at 4
found at 9
输出 #2:未检测到
enter the text: karp rabin
enter the pattern: rabin
输出#3:检测到
enter the text: best practices in
enter the pattern: ic
found at 10
输出#4:未检测到
enter the text: would you like to hear me tell a joke
enter the pattern: hear
我无法弄清楚为什么会这样。我认为在某些情况下,通过滚动散列函数从先前存储的散列值计算的散列值不等于该文本大小(m)的实际散列值。我在哪里做错了?
提前致谢