本文只涉及lucene的应用,关于其原理等暂不涉及,有时间再单独写一篇。
用常用的文章类作为例子,实体类代码如下:
@Data
public class Article implements Serializable{
private Long id;
private String title;
private String describe;
private String content;
private Integer status;
private Date createtime;
}
maven依赖引入:
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.3.1</version>
</dependency>
<!--一般分词器,适用于英文分词-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.3.1</version>
</dependency>
<!--中文分词器-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.3.1</version>
</dependency>
<!--对分词索引查询解析-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.3.1</version>
</dependency>
<!--检索关键字高亮显示-->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>5.3.1</version>
</dependency>
创建索引过程如图:
生成document文档:
/**
* 生成document
* @param article
* @return
*/
public static Document createDocument(Article article){
if(article == null){
return null;
}
Document doc = new Document();
Field id = new LongField("id",article.getId(),Field.Store.YES);
Field title = new TextField("title",article.getTitle(), Field.Store.YES);
Field describe = new TextField("describe",article.getDescribe(), Field.Store.YES);
Field content = new TextField("content",article.getContent(), Field.Store.YES);
doc.add(title);
doc.add(describe);
doc.add(content);
doc.add(id);
return doc;
}
创建索引(写在一起了。。):
/**
* 生成索引
* @param article
*/
public static void createIndex(Article article){
if(article == null ){
return;
}
Document doc = createDocument(article);
try {
//获取中文分词器
Analyzer analyzer = new SmartChineseAnalyzer();
//索引目录
Directory directory = FSDirectory.open(new File(PATH).toPath());
//OpenMode.APPEND:增量索引
//OpenMode.CREATE:覆盖
IndexWriterConfig config = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.APPEND);
IndexWriter writer = new IndexWriter(directory,config);
writer.addDocument(doc);
writer.close();
directory.close();
} catch (Exception e) {
e.printStackTrace();
}
}
查询:
/** * 查询 * @param source */ public static void search(String source){ try { Directory directory = FSDirectory.open(Paths.get(PATH)); IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new SmartChineseAnalyzer(); //从多个fields中查找 QueryParser parser = new MultiFieldQueryParser(FIELDS,analyzer); Query query = parser.parse(source); //高亮显示 SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>"); QueryScorer scorer = new QueryScorer(query);//计算得分 Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分计算片段 Highlighter highlighter = new Highlighter(formatter,scorer); highlighter.setTextFragmenter(fragmenter);//设置要显示的片段 TopDocs docs = searcher.search(query,10); for (ScoreDoc score:docs.scoreDocs){ Document document = searcher.doc(score.doc); String title = document.get("title"); System.out.println(document.get("id")); //显示高亮部分 if(title != null){ TokenStream tokenStream = analyzer.tokenStream("title",new StringReader(title)); String htitle = highlighter.getBestFragment(tokenStream,title); System.out.println(htitle); } } }catch (Exception e){ } }