使用Lucene 2.4.0 建立索引+搜索结果高亮显示(二)

使用2.4版本查询并高亮显示

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TokenSources;

public class NewSearch
{
public static void main(String[] args) throws Exception
{
String indexDir = "index";
String queryString = "oracle";
IndexSearcher isearcher = new IndexSearcher(indexDir);



BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
TopDocCollector collector = new TopDocCollector(10); // 启用这个
Query query = MultiFieldQueryParser.parse(queryString, new String[]{"path", "body"}, clauses,
new StandardAnalyzer());
isearcher.search(query, collector); // 作为参数
ScoreDoc[] hits = collector.topDocs().scoreDocs; // 拿到结果
int docId;
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<strong><font color='red'>", "</font></strong>");
SimpleFragmenter fragmenter = new SimpleFragmenter(60);
Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(fragmenter);
int maxNumFragmentsRequired = 10;
String fragmentSeparator = "";
TermPositionVector tpv = null;
TokenStream tokenstream = null;
for (int i = 0; i < hits.length; i++)
{
// 循环
docId = hits[i].doc; // 一个内部编号
Document doc = isearcher.doc(docId); // 通过编号,拿到文档
System.out.println("所在文件路径:"+doc.get("path"));
tpv = (TermPositionVector) isearcher.getIndexReader().getTermFreqVector(docId, "body");
tokenstream = TokenSources.getTokenStream(tpv);
String result = highlighter.getBestFragments(tokenstream, doc.get("body"),maxNumFragmentsRequired, fragmentSeparator);
System.out.println("内容"+result);
}
}
}



Lucene 2.4里一些过期方法的解决方案
[url]http://extjs2.iteye.com/blog/268014[/url]
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值