使用2.4版本查询并高亮显示
Lucene 2.4里一些过期方法的解决方案
[url]http://extjs2.iteye.com/blog/268014[/url]
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TokenSources;
public class NewSearch
{
public static void main(String[] args) throws Exception
{
String indexDir = "index";
String queryString = "oracle";
IndexSearcher isearcher = new IndexSearcher(indexDir);
BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
TopDocCollector collector = new TopDocCollector(10); // 启用这个
Query query = MultiFieldQueryParser.parse(queryString, new String[]{"path", "body"}, clauses,
new StandardAnalyzer());
isearcher.search(query, collector); // 作为参数
ScoreDoc[] hits = collector.topDocs().scoreDocs; // 拿到结果
int docId;
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<strong><font color='red'>", "</font></strong>");
SimpleFragmenter fragmenter = new SimpleFragmenter(60);
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(fragmenter);
int maxNumFragmentsRequired = 10;
String fragmentSeparator = "";
TermPositionVector tpv = null;
TokenStream tokenstream = null;
for (int i = 0; i < hits.length; i++)
{
// 循环
docId = hits[i].doc; // 一个内部编号
Document doc = isearcher.doc(docId); // 通过编号,拿到文档
System.out.println("所在文件路径:"+doc.get("path"));
tpv = (TermPositionVector) isearcher.getIndexReader().getTermFreqVector(docId, "body");
tokenstream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenstream, doc.get("body"),maxNumFragmentsRequired, fragmentSeparator);
System.out.println("内容"+result);
}
}
}
Lucene 2.4里一些过期方法的解决方案
[url]http://extjs2.iteye.com/blog/268014[/url]