在这里就不解释Lucene应用中的基本概念了,直接切入主题
一、添加索引过程
public static void addDocument(String title,String content){
Document doc = new Document();
Field titleField = new Field("title",title,Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);
Field contentField = new Field("content",content, Field.Store.NO, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS);
Field content2Field = null;
//CompressionTools 用于将 长文本 或二进制文件压缩存储
content2Field = new Field("content",CompressionTools.compressString(content), Field.Store.YES);
doc.add(titleField);
doc.add(contentField);
doc.add(content2Field);
docmentList.add(doc);
}
public static void index(){
try {
//获取索引存放的路径
FSDirectory indexDir = FSDirectory.open(new File(FilePathUtils.getIndexPath()));
//实例化IKAnalyzer中文分词器
Analyzer analyzer = new IKAnalyzer();
IndexWriter indexWriter = new IndexWriter(indexDir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
for (Document doc : docmentList) {
indexWriter.addDocument(doc);
}
indexWriter.optimize();
indexWriter.close();
docmentList = new ArrayList<Document>();
} catch (IOException e) {
e.printStackTrace();
}
}
二、检索过程
public static void search(String keyword){
try {
//获取索引存放的路径
FSDirectory indexDir = FSDirectory.open(new File(FilePathUtils.getIndexPath()));
//集成IKAnalyzer中文分词器
Analyzer analyzer = new IKAnalyzer();
// 实例化搜索器
IndexSearcher searcher = new IndexSearcher(indexDir,true);
// 在索引器中使用 IKSimilarity 相似度评估器
searcher.setSimilarity(new IKSimilarity());
// 使用 IKQueryParser 查询分析器构造 Query 对象
Query query = IKQueryParser.parse("title", keyword);//指定检索的字段
//构建高亮显示格式
SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
//高亮对象
Highlighter highlighter = new Highlighter(sHtmlF,new QueryScorer(query));
//设置高亮附近的字数
highlighter.setTextFragmenter(new SimpleFragmenter(100));
//分页索取结果,每页显示5条记录
TopScoreDocCollector collector = TopScoreDocCollector.create(5, false);
//执行检索
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int numTotalHits = collector.getTotalHits();
System.out.println("共检索到"+numTotalHits+"条记录");
// 输出结果
for (int i = 0; i < hits.length; i++) {
Document targetDoc = searcher.doc(hits[i].doc);
try {
//highlighter.getBestFragment()获取高亮显示结果
System.out.println(" 标题: " + highlighter.getBestFragment(analyzer,"title",targetDoc.get("title")));
Field contentField = targetDoc.getField("content");
//CompressionTools.decompressString() 解压
System.out.println(" 内容: " + CompressionTools.decompressString(contentField.getBinaryValue()));
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
} catch (DataFormatException e) {
e.printStackTrace();
}
}
} catch (IOException e) {
e.printStackTrace();
}
}