关于lucene 分页

最新推荐文章于 2021-04-25 18:06:49 发布

jimmycap

最新推荐文章于 2021-04-25 18:06:49 发布

阅读量1.2k

点赞数

分类专栏： JAVA类专题文章标签： lucene string exception query file search

本文链接：https://blog.csdn.net/jimmycap/article/details/5220675

版权

JAVA类专题专栏收录该内容

2 篇文章 0 订阅

订阅专栏

最近在做一个搜索引擎,本来想在网上找找看看大家是如何分页,找了很久没找到啥好点子.于是自己写了一个...

具体思路是,先默认指定前100个搜索结果,当这一100个结果不能满足时,继续查找更多记录.

package com.yourcompany.struts.action;

import java.io.File;
import java.io.StringReader;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.mira.lucene.analysis.IK_CAnalyzer;

import com.cyberwisdow.pojo.Page;

public class ClientQuery extends BaseFile {
private Analyzer analyzer = new IK_CAnalyzer();

private String timeSpan;

public String getTimeSpan() {
return this.timeSpan;
}

public List<Page> search(String key, int pageSize, int pageIndex,
   RefObject<Integer> refCount) {
  Long start = System.currentTimeMillis();
  List<Page> list = new ArrayList<Page>();
  Directory dir = null;
  Searcher s = null;
  try {
   TopDocs topDocs = null;
   String path = getText("IndexFile") + File.separator + "search";
   File file = new File(path);
   if (file == null)
    System.out.println("Index File not exist");
   dir = FSDirectory.open(file);
   s = new IndexSearcher(dir, true);

   String[] fields = { "text", "title" };
   MultiFieldQueryParser mq = new MultiFieldQueryParser(
     Version.LUCENE_29, fields, this.analyzer);
   Query query = mq.parse(key);
   SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(
     "<font color=red>", "</font>");
   Highlighter highlighter = new Highlighter(simpleHTMLFormatter,
     new QueryScorer(query));
   highlighter.setTextFragmenter(new SimpleFragmenter(200));

   topDocs = s.search(query, 100);
   int iCount = topDocs.totalHits;
   int startIndex = Math.max((pageIndex - 1) * pageSize, 0);
   int endIndex = Math.min(pageIndex * pageSize - 1, iCount - 1);
   refCount.refData = iCount;
   /*******************************************************************
    * 搜索出来，共分多少页
    */
   int allTotal = (iCount / pageSize)
     + (iCount % pageSize > 0 ? 1 : 0);
   ScoreDoc[] scoreDocs = topDocs.scoreDocs;
   int iLength = scoreDocs.length;
   /*******************************************************************
    * 前100行，共分多少页
    */
   int userTotal = (iLength / pageSize)
     + (iLength % pageSize > 0 ? 1 : 0);

   if (iCount >= iLength) {
    /***************************************************************
     * 当前100行结果集，不能满足时,续取出总结果
     */
    if (pageIndex > userTotal && pageIndex <= allTotal) {
     topDocs = s.search(query, iCount);
     scoreDocs = topDocs.scoreDocs;
     iLength = topDocs.scoreDocs.length;
     System.out.println("--------" + topDocs.scoreDocs.length);
    }
   }

for (int i = startIndex; i < endIndex && i < iLength; i++) {
Document doc = s.doc(scoreDocs[i].doc);

    String title = doc.get("title").trim();
    String text = doc.get("text").trim();
    Page p = new Page();
    p.setId(Long.parseLong(doc.get("docId")));
    p.setGuid(doc.get("guid"));
    p.setCreateTime(doc.get("createDate"));
    if (!isNullOrEmpty(text))
     p.setText(getHighlight("text", text, highlighter));
    if (!isNullOrEmpty(title))
     p.setTitle(getHighlight("title", title, highlighter));
    String url = doc.get("url");
    if (!isNullOrEmpty(url))
     p.setUrl(url.length() > 60 ? url.substring(0, 57) + "..."
       : url);
    list.add(p);

   }
   DecimalFormat f = new DecimalFormat("#.#####");
   Long end = System.currentTimeMillis();
   Long result = end - start;
   NumberFormat fot = NumberFormat.getInstance();
   this.timeSpan = "找到相关网页约:" + fot.format(iCount) + "篇,用时"
     + f.format(result / 1000.00000) + "秒";

  } catch (Exception ex) {
   System.out.println("Search:" + ex.getMessage());
   ex.printStackTrace();
  } finally {
   try {
    if (dir != null)
     dir.close();
    if (s != null)
     s.close();

   } catch (Exception ex) {
    ex.printStackTrace();
   }
  }
  return list;
}

private String getHighlight(String filedName, String text,
   Highlighter highlighter) {
  String str = "";
  try {
   if (!isNullOrEmpty(text)) {
    TokenStream tokenStream = analyzer.tokenStream(filedName,
      new StringReader(text));
    str = highlighter.getBestFragment(tokenStream, text);
    if (isNullOrEmpty(str))
     str = text;
   }

  } catch (Exception ex) {
   ex.printStackTrace();
  }

return str;
}

public static boolean isNullOrEmpty(Object value) {
return ((value == null) || ("".equals(value)) || ("null".equals(value)));
}
}

jimmycap

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
1
评论
关于lucene 分页

最近在做一个搜索引擎,本来想在网上找找看看大家是如何分页,找了很久没找到啥好点子.于是自己写了一个... 具体思路是,先默认指定前100个搜索结果,当这一100个结果不能满足时,继续查找更多记录. package com.yourcompany.struts.action;import java.io.File;import java.io.StringReader;impor
复制链接

扫一扫