我正在 Solr 中使用 WordDelimiterFilter 进行一些测试,但它不会保留我传递给它的受保护单词列表。请您检查代码和输出示例并建议哪个部分丢失或使用不当?
运行此代码:
private static Analyzer getWordDelimiterAnalyzer() {
return new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream stream = new StandardTokenizer(Version.LUCENE_32, reader);
WordDelimiterFilterFactory wordDelimiterFilterFactory = new WordDelimiterFilterFactory();
HashMap<String, String> args = new HashMap<String, String>();
args.put("generateWordParts", "1");
args.put("generateNumberParts", "1");
args.put("catenateWords", "1");
args.put("catenateNumbers", "1");
args.put("catenateAll", "0");
args.put("luceneMatchVersion", Version.LUCENE_32.name());
args.put("language", "English");
args.put("protected", "protected.txt");
wordDelimiterFilterFactory.init(args);
ResourceLoader loader = new SolrResourceLoader(null, null);
wordDelimiterFilterFactory.inform(loader);
/*List<String> protectedWords = new ArrayList<String>();
protectedWords.add("good bye");
protectedWords.add("hello world");
wordDelimiterFilterFactory.inform(new LinesMockSolrResourceLoader(protectedWords));
*/
return wordDelimiterFilterFactory.create(stream);
}
};
}