2

我曾经根据我的需要使用动态编程O(m * n)、后缀树O(m + n)、后缀数组O(nlog^2 n)来计算最长公共子串。最近我学习了Suffix Automaton,它在O(n)中的表现非常令人印象深刻。

我可以编写可以轻松计算最长公共子串长度的代码。例如:

Input:
abcdef
xyzabc

Output:
3

这是代码:

#include <bits/stdc++.h>
using namespace std;

const int maxN = 250500;
const int maxState = maxN << 1;

struct State {
    State *go[26], *suffix;
    int depth, id;
    long long cnt;
};
State pool[maxState], *point, *root, *sink;
int size;

State *newState(int dep) {
    point->id = size++;
    point->depth = dep;
    return point++;
}

void init() {
    point = pool;
    size = 0;
    root = sink = newState(0);
}

void insert(int a) {
    State *p = newState(sink->depth+1);
    State *cur = sink, *sufState;
    while (cur && !cur->go[a]) {
        cur->go[a] = p;
        cur = cur->suffix;
    }
    if (!cur)
        sufState = root;
    else {
        State *q = cur->go[a];
        if (q->depth == cur->depth + 1)
            sufState = q;
        else {
            State *r = newState(cur->depth+1);
            memcpy(r->go, q->go, sizeof(q->go));
            r->suffix = q->suffix;
            q->suffix = r;
            sufState = r;
            while (cur && cur->go[a] == q) {
                cur->go[a] = r;
                cur = cur->suffix;
            }
        }
    }
    p->suffix = sufState;
    sink = p;
}

int work(char buf[]) {
    //printf("%s", buf);
    int len = strlen(buf);
    int tmp = 0, ans = 0;
    State *cur = root;
    for (int i = 0; i < len; i++) {
        if (cur->go[buf[i]-'a']) {
            tmp++;
            cur = cur->go[buf[i]-'a'];
        } else {
            while (cur && !cur->go[buf[i]-'a'])
                cur = cur->suffix;
            if (!cur) {
                cur = root;
                tmp = 0;
            } else {
                tmp = cur->depth + 1;
                cur = cur->go[buf[i]-'a'];
            }
        }
        ans = max(ans, tmp);

    }
    return ans;
}

char ch[maxN];

int main() {
    scanf("%s", ch);
    init();
    int len = strlen(ch);
    for (int i = 0; i < len; i++)
        insert(ch[i]-'a');
    scanf("%s", ch);
    printf("%d\n", work(ch));
    return 0;
}

但现在我需要打印最长的公共子字符串本身,而不是长度。但是我无法修改我的代码:(如何修改此代码以打印最长的公共子字符串?

4

1 回答 1

3

当你在这一行时:

ans = max(ans, tmp);

buf达到的深度的起始位置tmpi - tmp + 1。现在您知道了第二个字符串中所有最长公共子字符串的位置。只需选择任何一个并输出结果。

于 2014-03-16T21:27:00.823 回答