我有这个学校任务,要求我们编写代码来找到最长的公共子字符串。我已经这样做了,但它只适用于不太大的文本,并且被要求找到 Moby Dick 和 War And Peace 的公共子字符串。如果你能指出我做错了什么的正确方向,我将不胜感激。当我调用它来创建 SuffixArray 时,编译器抱怨错误出现在 MyString 类的子字符串方法中,但我不知道为什么它说它太大,给了我内存不足
package datastructuresone;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.Scanner;
class SuffixArray
{
private final MyString[] suffixes;
private final int N;
public SuffixArray(String s)
{
N = s.length();
MyString snew = new MyString(s);
suffixes = new MyString[N];
for (int i = 0; i < N; i++)
{
suffixes[i] = snew.substring(i);
}
Arrays.sort(suffixes);
}
public int length()
{
return N;
}
public int index(int i)
{
return N - suffixes[i].length();
}
public MyString select(int i)
{
return suffixes[i];
}
// length of longest common prefix of s and t
private static int lcp(MyString s, MyString t)
{
int N = Math.min(s.length(), t.length());
for (int i = 0; i < N; i++)
{
if (s.charAt(i) != t.charAt(i))
{
return i;
}
}
return N;
}
// longest common prefix of suffixes(i) and suffixes(i-1)
public int lcp(int i)
{
return lcp(suffixes[i], suffixes[i - 1]);
}
// longest common prefix of suffixes(i) and suffixes(j)
public int lcp(int i, int j)
{
return lcp(suffixes[i], suffixes[j]);
}
}
public class DataStructuresOne
{
public static void main(String[] args) throws FileNotFoundException
{
Scanner in1 = new Scanner(new File("./build/classes/WarAndPeace.txt"));
Scanner in2 = new Scanner(new File("./build/classes/MobyDick.txt"));
StringBuilder sb = new StringBuilder();
StringBuilder sb1 = new StringBuilder();
while (in1.hasNextLine())
{
sb.append(in1.nextLine());
}
while (in2.hasNextLine())
{
sb1.append(in2.nextLine());
}
String text1 = sb.toString().replaceAll("\\s+", " ");
String text2 = sb1.toString().replaceAll("\\s+", " ");
int N1 = text1.length();
int N2 = text2.length();
SuffixArray sa = new SuffixArray(text1 + "#" + text2);
int N = sa.length();
String substring = "";
for (int i = 1; i < N; i++)
{
// adjacent suffixes both from second text string
if (sa.select(i).length() <= N2 && sa.select(i - 1).length() <= N2)
{
continue;
}
// adjacent suffixes both from first text string
if (sa.select(i).length() > N2 + 1 && sa.select(i - 1).length() > N2 + 1)
{
continue;
}
// check if adjacent suffixes longer common substring
int length = sa.lcp(i);
if (length > substring.length())
{
substring = sa.select(i).toString().substring(0, length);
System.out.println(substring + " ");
}
}
System.out.println("The length of the substring " + substring.length() + "length on first N " + N1 + " length of Second N " + N2
+ "The length of the array sa: " + N);
System.out.println("'" + substring + "'");
final class MyString implements Comparable<MyString>
{
public MyString(String str)
{
offset = 0;
len = str.length();
arr = str.toCharArray();
}
public int length()
{
return len;
}
public char charAt(int idx)
{
return arr[ idx + offset];
}
public int compareTo(MyString other)
{
int myEnd = offset + len;
int yourEnd = other.offset + other.len;
int i = offset, j = other.offset;
for (; i < myEnd && j < yourEnd; i++, j++)
{
if (arr[ i] != arr[ j])
{
return arr[ i] - arr[ j];
}
}
// reached end. Who got there first?
if (i == myEnd && j == yourEnd)
{
return 0; // identical strings
}
if (i == myEnd)
{
return -1;
} else
{
return +1;
}
}
public MyString substring(int beginIndex, int endIndex)
{
return new MyString(arr, beginIndex + offset, endIndex - beginIndex);
}
public MyString substring(int beginIndex)
{
return substring(beginIndex, offset + len);
}
public boolean equals(Object other)
{
return (other instanceof MyString) && compareTo((MyString) other) == 0;
}
public String toString()
{
return new String(arr, offset, len);
}
private MyString(char[] a, int of, int ln)
{
arr = a;
offset = of;
len = ln;
}
private char[] arr;
private int offset;
private int len;
}