此博文针对的是Lucene版本5.3.0,若您的Lucene版本为3.X,请移步这里http://write.blog.csdn.net/postedit/78291868(只提取关键词,未包含同义词检索)
本篇文章包含两个功能
1、精确提取自定义关键词
2、同义词检索与提取
废话不多说,直接撸代码
定义同义词分词类如下
package com.daelly.sample.lucene.analyzer.synonyms;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.FilesystemResourceLoader;
import org.wltea.analyzer.lucene.IKTokenizer;
public class SynonymsAnalyzer extends Analyzer {
private final String synonymsPath;
public SynonymsAnalyzer(String synonymsPath) {
if(synonymsPath==null || synonymsPath.isEmpty()) {
throw new IllegalArgumentException("synonymsPath must be provided!");
}
this.synonymsPath = synonymsPath;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
SynonymFilterFactory factory = null;
try {
factory = getSynonymFilterFactory();
} catch (IOException e) {
e.printStackTrace();
}
Tokenizer tokenizer = new IKTokenizer(true);
if(factory != null) {
TokenStream tokenStream = factory.create(tokenizer);
return new TokenStreamComponents(tokenizer,tokenStream);
}
return new TokenStreamComponents(tokenizer);
}
private SynonymFilterFactory getSynonymFilterFactory() throws IOException {
if(synonymsPath.contains("classpath:")) {
String path = synonymsPath.replace("classpath:", "");
Map args = new HashMap<String,String>();
args.put("synonyms", path);
SynonymFilterFactory factory = new SynonymFil