Lucene5 实现搜索引擎功能,高亮代码
packagesearch.searchengine.cs.swjtu;importjava.io.IOException;importjava.nio.file.Path;importjava.nio.file.Paths;importjava.util.ArrayList;importjava.util.List;importorg.apache.lucene.analysis.Analyzer;importorg.apache.lucene.analysis.TokenStream;importorg.apache.lucene.analysis.standard.StandardAnalyzer;importorg.apache.lucene.document.Document;importorg.apache.lucene.index.DirectoryReader;importorg.apache.lucene.index.IndexReader;importorg.apache.lucene.queryparser.classic.ParseException;importorg.apache.lucene.queryparser.classic.QueryParser;importorg.apache.lucene.search.IndexSearcher;importorg.apache.lucene.search.ScoreDoc;importorg.apache.lucene.search.TopScoreDocCollector;importorg.apache.lucene.search.highlight.Formatter;importorg.apache.lucene.search.highlight.Fragmenter;importorg.apache.lucene.search.highlight.Highlighter;importorg.apache.lucene.search.highlight.InvalidTokenOffsetsException;importorg.apache.lucene.search.highlight.QueryScorer;importorg.apache.lucene.search.highlight.SimpleFragmenter;importorg.apache.lucene.search.highlight.SimpleHTMLFormatter;importorg.apache.lucene.search.highlight.SimpleSpanFragmenter;importorg.apache.lucene.search.highlight.TokenSources;importorg.apache.lucene.store.Directory;importorg.apache.lucene.store.FSDirectory;public classQuery {/**
* 索引后,进行查询,将查询结果封装为QueryBean对象,加入session,为JSP提供页面结果
*
**/
private static String lighterStr(org.apache.lucene.search.Query query,Analyzer a,String fieldName,String txt) throwsIOException, InvalidTokenOffsetsException {
String str= null;//设定放回结果
QueryScorer queryScorer = new QueryScorer(query);//如果有需要,可以传入评分//设置高亮标签
Formatter formatter = new SimpleHTMLFormatter("", "");//高亮分析器
Highlighter hl = newHighlighter(formatter, queryScorer);
Fragmenter fragmenter= newSimpleSpanFragmenter(queryScorer);
hl.setTextFragmenter(fragmenter);//获取返回结果
str =hl.getBestFragment(a, fieldName,txt);if(str == null){returntxt;
}returnstr;
}public static List query(String queryString,String indexUrl) throwsIOException, ParseException, InvalidTokenOffsetsException {/**/ /* 这里放索引文件的位置*/Path path=Paths.get(indexUrl);
Directory indexDir=FSDirectory.open(path);
org.apache.lucene.search.Query query= null;
IndexReader reader=DirectoryReader.open(indexDir);
IndexSearcher searcher= newIndexSearcher(reader);
TopScoreDocCollector collector= TopScoreDocCollector.create(10);
Analyzer analyzer= newStandardAnalyzer();
QueryScorer queryScorer= new QueryScorer(query);//如果有需要,可以传入评分//设置高亮标签
Formatter formatter = new SimpleHTMLFormatter("", "");//高亮分析器
Highlighter hl = newHighlighter(formatter, queryScorer);
Fragmenter fragmenter= newSimpleSpanFragmenter(queryScorer);
hl.setTextFragmenter(fragmenter);try{
QueryParser qp= new QueryParser( "content", analyzer);
query=qp.parse(queryString);
}catch(ParseException e) {
}
searcher.search(query, collector);
ScoreDoc[] hits=collector.topDocs().scoreDocs;
List rstList = new ArrayList();//结果列表(已高亮处理)
System.out.println("Found " + hits.length + " hits.");for(int i=0;i
Document d=searcher.doc(docId);
String rst= "";
String title= d.get("title");
String content= d.get("content");
title= lighterStr(query, analyzer, "title", title);
content= lighterStr(query, analyzer, "content", content);
rst= d.get("url")+"\t"+title+"\t"+content;
rstList.add(rst);//System.out.println((i + 1) +". " + d.get("url") +" "+title+"\t"+content);
System.out.println((i + 1) +". " +rst);
}returnrstList;
}
}