您好,我在优化burrows Wheeler 变换时遇到了一些困难。我正在尝试转换文本文件,但是转换像圣经这样的大文本文件需要的时间太长了。
关于如何进行的任何想法?
public BurrowsWheelerTransformEncoder()
{
}
private String originalSuffix(int index, String string)
{
String temp = (string.substring(index,string.length()) + string.substring(0,index));
//this bit just 'compresses' each transformation of text by producing
//a prefix, so 'abracadabra' just becomes 'abrac'
//this is so minimal amount of memory is used when it is stored in an array
return temp.substring(0,5)+
//the last character of the transformation is kept
temp.charAt(temp.length()-1);
}
private String compressedSuffix(String string)
{
//this method just 'compresses' original piece of text by producing
//a prefix, so 'abracadabra' just becomes 'abrac'
//this is so comprisons won't take so long
return string.substring(0,5)+string.charAt(string.length()-1);
}
public static void main(String args[]) throws Exception
{
BurrowsWheelerTransformEncoder encoder = new BurrowsWheelerTransformEncoder();
BufferedReader input = new BufferedReader(new FileReader("src/compressionalgorithm/texts/manifesto.txt"));
String text = "";
//the row in the sorted array where the original text can be found
int originalRow = 0;
//system time when program began
long startTime = System.nanoTime();
//get text from file
while(input.ready())
{
text += input.readLine();
}
//create a new array to hold all transformations
String[] textArray = new String[text.length()];
int length = text.length();
//get individual transformations and put in array
for(int i = 0; i < text.length(); i++)
{
textArray[i] = encoder.originalSuffix(i,text);
//for debugging large text files, prints progress after every 10k'th
//transformation
if(i%10000==0)
System.out.println(i+"/"+length);
}
//uses java's internal methods to sort the array, presumably
//the most efficient way to do the sort (for now)
Arrays.sort(textArray);
String compressedOriginalText = encoder.compressedSuffix(text);
//print the results
for(int i = 0; i < textArray.length; i++)
{
if(textArray[i].equals(compressedOriginalText))
{
originalRow = i;
}
if(i%100==0)
{
System.out.println();
}
System.out.print(textArray[i].charAt(textArray[i].length()-1));
}
System.out.println("\nThe original transformation of the text was found at row " + originalRow + " of the sorted array.");
System.out.println("Time elapsed: " + (System.nanoTime() - startTime));
}