我需要修改这个脚本,以便可以检查两个以上的单词,而且我对 Java 的了解太有限,无法自己进行更改。该脚本是 OpenOffice (LanguageTool) 的开源语法检查器的一部分,脚本的目的是用其他词替换某些词。
要检查的单词文件名为“coherency.txt”,格式如下: WrongWord1=CorrectWord1 WrongWord2=CorrectWord2
当我输入: WrongWord1 时,它会被脚本标记并告诉我应该改用 CorrectWord1。
但我需要能够拥有三个或更多单词,如下所示: WrongWord1=WrongWord2=CorrectWord1 WrongWord3=WrongWord4=WrongWord5=CorrectWord2 WrongWord6=CorrectWord3
因此,当我输入 WrongWord3 时它被标记并且脚本告诉我应该使用 CorrectWord2 或者当我输入 WrongWord2 时它也被标记并且脚本告诉我应该使用 CorrectWord1
如果您可以提供帮助,我可以在http://www.sbbic.org/lang/en-us/volunteer/上提供指向您网页的链接
您可以就如何修改此代码以允许对两个以上的单词进行比较和替换提供任何帮助,我们将不胜感激!谢谢,内森
/* LanguageTool, a natural language style checker
* Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package de.danielnaber.languagetool.rules;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;
import de.danielnaber.languagetool.AnalyzedSentence;
import de.danielnaber.languagetool.AnalyzedTokenReadings;
import de.danielnaber.languagetool.JLanguageTool;
import de.danielnaber.languagetool.tools.StringTools;
/**
* A Khmer rule that matches words or phrases which should not be used and suggests
* correct ones instead. Loads the relevant words from
* <code>rules/km/coherency.txt</code>, where km is a code of the language.
*
* @author Andriy Rysin
*/
public abstract class KhmerWordCoherencyRule extends KhmerRule {
private Map<String, String> wrongWords; // e.g. "вреѿті реѿт" -> "зреѿтою"
private static final String FILE_NAME = "/km/coherency.txt";
public abstract String getFileName();
private static final String FILE_ENCODING = "utf-8";
public String getEncoding() {
return FILE_ENCODING;
}
/**
* Indicates if the rule is case-sensitive. Default value is <code>true</code>.
* @return true if the rule is case-sensitive, false otherwise.
*/
public boolean isCaseSensitive() {
return false;
}
/**
* @return the locale used for case conversion when {@link #isCaseSensitive()} is set to <code>false</code>.
*/
public Locale getLocale() {
return Locale.getDefault();
}
public KhmerWordCoherencyRule(final ResourceBundle messages) throws IOException {
if (messages != null) {
super.setCategory(new Category(messages.getString("category_misc")));
}
wrongWords = loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(getFileName()));
}
public String getId() {
return "KM_WORD_COHERENCY";
}
public String getDescription() {
return "Checks for wrong words/phrases";
}
public String getSuggestion() {
return " is not valid, use ";
}
public String getShort() {
return "Wrong word";
}
public final RuleMatch[] match(final AnalyzedSentence text) {
final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
for (int i = 1; i < tokens.length; i++) {
final String token = tokens[i].getToken();
final String origToken = token;
final String replacement = isCaseSensitive()?wrongWords.get(token):wrongWords.get(token.toLowerCase(getLocale()));
if (replacement != null) {
final String msg = token + getSuggestion() + replacement;
final int pos = tokens[i].getStartPos();
final RuleMatch potentialRuleMatch = new RuleMatch(this, pos, pos
+ origToken.length(), msg, getShort());
if (!isCaseSensitive() && StringTools.startsWithUppercase(token)) {
potentialRuleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(replacement));
} else {
potentialRuleMatch.setSuggestedReplacement(replacement);
}
ruleMatches.add(potentialRuleMatch);
}
}
return toRuleMatchArray(ruleMatches);
}
private Map<String, String> loadWords(final InputStream file) throws IOException {
final Map<String, String> map = new HashMap<String, String>();
InputStreamReader isr = null;
BufferedReader br = null;
try {
isr = new InputStreamReader(file, getEncoding());
br = new BufferedReader(isr);
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
if (line.length() < 1) {
continue;
}
if (line.charAt(0) == '#') { // ignore comments
continue;
}
final String[] parts = line.split("=");
if (parts.length != 2) {
throw new IOException("Format error in file "
+ JLanguageTool.getDataBroker().getFromRulesDirAsUrl(getFileName()) + ", line: " + line);
}
map.put(parts[0], parts[1]);
}
} finally {
if (br != null) {
br.close();
}
if (isr != null) {
isr.close();
}
}
return map;
}
public void reset() {
}
}