我编写了一个程序来搜索段落中的给定短语,并在该段落中用花括号将短语括起来。我已经使用 BoyerMoore 的算法进行搜索。同时我还需要提高程序的性能。虽然我得到了所需的输出,但性能是灾难性的。
这是代码:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
public class BoyerMoore {
static class Pair {
public int start, end;
Pair(int start, int end) {
this.start = start;
this.end = end;
}
public int weight() {
return end - start;
}
public boolean contains(int point) {
return start <= point && point <= end;
}
public int returnStart() {
return start;
}
}
static class Group {
public List<Pair> pairs = new ArrayList<Pair>();
public Pair maxWeight;
Group(Pair start) {
add(start);
}
Group(List<Pair> pairs) {
for (Pair pair : pairs) {
add(pair);
}
}
public boolean contains(Pair pair) {
for (Pair my : pairs) {
if (my.contains(pair.start) || my.contains(pair.end))
return true;
}
return false;
}
public void add(Pair pair) {
pairs.add(pair);
if (maxWeight == null || maxWeight.weight() < pair.weight())
maxWeight = pair;
}
}
public static List<Integer> match(String pattern, String text) {
List<Integer> matches = new ArrayList<Integer>();
int m = text.length();
int n = pattern.length();
Map<Character, Integer> rightMostIndexes = preprocessForBadCharacterShift(pattern);
int alignedAt = 0;
while (alignedAt + (n - 1) < m) {
for (int indexInPattern = n - 1; indexInPattern >= 0; indexInPattern--) {
int indexInText = alignedAt + indexInPattern;
char x = text.charAt(indexInText);
char y = pattern.charAt(indexInPattern);
if (indexInText >= m)
break;
if (x != y) {
Integer r = rightMostIndexes.get(x);
if (r == null) {
alignedAt = indexInText + 1;
} else {
int shift = indexInText - (alignedAt + r);
alignedAt += shift > 0 ? shift : 1;
}
break;
} else if (indexInPattern == 0) {
matches.add(alignedAt);
alignedAt++;
}
}
}
return matches;
}
private static Map<Character, Integer> preprocessForBadCharacterShift(
String pattern) {
Map<Character, Integer> map = new HashMap<Character, Integer>();
for (int i = pattern.length() - 1; i >= 0; i--) {
char c = pattern.charAt(i);
if (!map.containsKey(c))
map.put(c, i);
}
return map;
}
public static void main(String[] args) throws IOException {
BufferedReader input = new BufferedReader(new InputStreamReader(
System.in));
ArrayList<String> ListOfAllPhrase = new ArrayList<String>();
List<Pair> pairs = new ArrayList<Pair>();
List<Group> groups = new ArrayList<Group>();
ListOfAllPhrase.add("protein");
ListOfAllPhrase.add("protein kinase");
ListOfAllPhrase.add("protein kinase A anchor protein");
ListOfAllPhrase.add("protein kinase A anchor proteins");
ListOfAllPhrase.add("protein kinase A anchor protein activity");
ListOfAllPhrase.add("IL-6");
ListOfAllPhrase.add("SOX5");
ListOfAllPhrase.add("NOX5");
System.out.println("Input a sentence: ");
String line = input.readLine();
char[] lineInChar = line.toCharArray();
long startTime = System.currentTimeMillis();
for (int i = 0; i < ListOfAllPhrase.size(); i++) {
// offset.add((ListOfAllPhrase.get(i)).length());
List<Integer> matches = match(ListOfAllPhrase.get(i).toLowerCase(),
line.toLowerCase());
for (Integer integer : matches) {
pairs.add(new Pair(integer, (ListOfAllPhrase.get(i)).length()
+ integer));
}
}
System.out.println("Total time taken: "
+ (System.currentTimeMillis() - startTime));
for (Pair pair : pairs) {
List<Group> intersects = new ArrayList<Group>();
for (Group group : groups) {
if (group.contains(pair)) {
intersects.add(group);
}
}
if (intersects.isEmpty()) {
groups.add(new Group(pair));
} else {
List<Pair> intervals = new ArrayList<Pair>();
intervals.add(pair);
for (Group intersect : intersects) {
intervals.addAll(intersect.pairs);
}
groups.removeAll(intersects);
groups.add(new Group(intervals));
}
}
StringBuilder newBuilder = new StringBuilder();
int flag = 1;
System.out.println(lineInChar.length);
for (int a = 0; a <= lineInChar.length; a++) {
for (Group group : groups) {
if (a == group.maxWeight.start) {
newBuilder.append("{");
flag = 1;
break;
}
if (a == group.maxWeight.end && a == lineInChar.length) {
newBuilder.append("}");
flag = 0;
break;
}
if (a == lineInChar.length && a == group.maxWeight.end + 1) {
newBuilder.append("}");
flag = 0;
break;
}
if (a == group.maxWeight.end) {
newBuilder.append("}");
flag = 1;
break;
}
}
if (flag == 0)
continue;
newBuilder.append(lineInChar[a]);
flag = 1;
}
System.out.println("Final output: " + newBuilder);
}
}
我可以实施或做些什么来提高我的程序的性能?我应该切换到另一种字符串搜索算法吗?
如果有人可以帮我解决这个问题?