关于lucene 分页

最近在做一个搜索引擎,本来想在网上找找看看大家是如何分页,找了很久没找到啥好点子.于是自己写了一个...

 

具体思路是,先默认指定前100个搜索结果,当这一100个结果不能满足时,继续查找更多记录.

 

package com.yourcompany.struts.action;

import java.io.File;
import java.io.StringReader;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.mira.lucene.analysis.IK_CAnalyzer;

import com.cyberwisdow.pojo.Page;

public class ClientQuery extends BaseFile {
 private Analyzer analyzer = new IK_CAnalyzer();

 private String timeSpan;

 public String getTimeSpan() {
  return this.timeSpan;
 }

 public List<Page> search(String key, int pageSize, int pageIndex,
   RefObject<Integer> refCount) {
  Long start = System.currentTimeMillis();
  List<Page> list = new ArrayList<Page>();
  Directory dir = null;
  Searcher s = null;
  try {
   TopDocs topDocs = null;
   String path = getText("IndexFile") + File.separator + "search";
   File file = new File(path);
   if (file == null)
    System.out.println("Index File not exist");
   dir = FSDirectory.open(file);
   s = new IndexSearcher(dir, true);

   String[] fields = { "text", "title" };
   MultiFieldQueryParser mq = new MultiFieldQueryParser(
     Version.LUCENE_29, fields, this.analyzer);
   Query query = mq.parse(key);
   SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
     "<font color=red>", "</font>");
   Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
     new QueryScorer(query));
   highlighter.setTextFragmenter(new SimpleFragmenter(200));

   topDocs = s.search(query, 100);
   int iCount = topDocs.totalHits;
   int startIndex = Math.max((pageIndex - 1) * pageSize, 0);
   int endIndex = Math.min(pageIndex * pageSize - 1, iCount - 1);
   refCount.refData = iCount;
   /*******************************************************************
    * 搜索出来,共分多少页
    */
   int allTotal = (iCount / pageSize)
     + (iCount % pageSize > 0 ? 1 : 0);
   ScoreDoc[] scoreDocs = topDocs.scoreDocs;
   int iLength = scoreDocs.length;
   /*******************************************************************
    * 前100行,共分多少页
    */
   int userTotal = (iLength / pageSize)
     + (iLength % pageSize > 0 ? 1 : 0);

   if (iCount >= iLength) {
    /***************************************************************
     * 当前100行结果集,不能满足时,续取出总结果
     */
    if (pageIndex > userTotal && pageIndex <= allTotal) {
     topDocs = s.search(query, iCount);
     scoreDocs = topDocs.scoreDocs;
     iLength = topDocs.scoreDocs.length;
     System.out.println("--------" + topDocs.scoreDocs.length);
    }
   }

   for (int i = startIndex; i < endIndex && i < iLength; i++) {
    Document doc = s.doc(scoreDocs[i].doc);

    String title = doc.get("title").trim();
    String text = doc.get("text").trim();
    Page p = new Page();
    p.setId(Long.parseLong(doc.get("docId")));
    p.setGuid(doc.get("guid"));
    p.setCreateTime(doc.get("createDate"));
    if (!isNullOrEmpty(text))
     p.setText(getHighlight("text", text, highlighter));
    if (!isNullOrEmpty(title))
     p.setTitle(getHighlight("title", title, highlighter));
    String url = doc.get("url");
    if (!isNullOrEmpty(url))
     p.setUrl(url.length() > 60 ? url.substring(0, 57) + "..."
       : url);
    list.add(p);

   }
   DecimalFormat f = new DecimalFormat("#.#####");
   Long end = System.currentTimeMillis();
   Long result = end - start;
   NumberFormat fot = NumberFormat.getInstance();
   this.timeSpan = "找到相关网页约:" + fot.format(iCount) + "篇,用时"
     + f.format(result / 1000.00000) + "秒";

  } catch (Exception ex) {
   System.out.println("Search:" + ex.getMessage());
   ex.printStackTrace();
  } finally {
   try {
    if (dir != null)
     dir.close();
    if (s != null)
     s.close();

   } catch (Exception ex) {
    ex.printStackTrace();
   }
  }
  return list;
 }

 private String getHighlight(String filedName, String text,
   Highlighter highlighter) {
  String str = "";
  try {
   if (!isNullOrEmpty(text)) {
    TokenStream tokenStream = analyzer.tokenStream(filedName,
      new StringReader(text));
    str = highlighter.getBestFragment(tokenStream, text);
    if (isNullOrEmpty(str))
     str = text;
   }

  } catch (Exception ex) {
   ex.printStackTrace();
  }

  return str;
 }

 public static boolean isNullOrEmpty(Object value) {
  return ((value == null) || ("".equals(value)) || ("null".equals(value)));
 }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值