lucene实战

最新推荐文章于 2022-01-24 13:49:04 发布

ld191474639

最新推荐文章于 2022-01-24 13:49:04 发布

阅读量1.5k

点赞数

本文链接：https://blog.csdn.net/ld191474639/article/details/8977878

版权

package timetask;

import java.io.File;
import java.io.PrintStream;
import java.util.Date;
import java.util.TimerTask;
import javabean.Article;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import table.ArticleTable;
import util.Constant;
import util.PubFun;

public class IndexTask extends TimerTask
{
  private static boolean isRunning = true;

  public void run() {
    if (!(isRunning)) return;
    try {
      indexF();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }

  private void indexF() throws Exception
  {
    File indexDir = new File(Constant.SEARCH_INDEX_PATH);
    if (!(indexDir.exists())) indexDir.mkdir();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), conf);
    Article[] articles = ArticleTable.loadArticlesForSearchIndex();
    System.out.println("articles 共有 " + articles.length + " 条");
    long startTime = new Date().getTime();

    for (int i = 0; i < articles.length; ++i) {
      indexWriter.addDocument(PubFun.getLuceneDoc(articles[i]));
    }
    indexWriter.forceMerge(1);
    indexWriter.close();

    long endTime = new Date().getTime();
    System.out.println("这花费了" + 
      (endTime - startTime) + 
      " 毫秒来把文档增加到索引里面去!");
  }
}

public static Article[] loadArticlesForSearchIndex() throws Exception {
    ArrayList articleList = new ArrayList();
    String sql = "select a.id,title,creatime,note,b.html_path,a.content,a.author from cms_article a,cms_column b where a.col_id=b.id";

    Object[] args = new Object[2];
    args[0] = sql;
    args[1] = "ArticleTable===============loadArticlesForSearchIndex has error";
    List lists = select(args);
    Article article = null;
    for (Iterator localIterator = lists.iterator(); localIterator.hasNext(); ) { Object list = localIterator.next();
      Object[] list2 = (Object[])list;
      article = new Article();
      article.setId(((Long)list2[0]).longValue());
      article.setTitle((String)list2[1]);
      article.setCreatime((Date)list2[2]);
      String note = (String)list2[3];
      if (note == null) note = "";
      article.setNote(note);
      article.setHtmlPath((String)list2[4]);
      article.setContent((String)list2[5]);
      article.setAuthor((String)list2[6]);
      articleList.add(article);
    }

    return ((Article[])articleList.toArray(new Article[articleList.size()]));
  }

 public static Document getLuceneDoc(Article article) {
    Document document = new Document();
    Field fieldId = new Field("id", String.valueOf(article.getId()), Field.Store.YES, 
      Field.Index.NOT_ANALYZED);
    Field fieldTitle = new Field("title", article.getTitle(), Field.Store.YES, 
      Field.Index.ANALYZED, 
      Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field fieldNote = new Field("note", article.getNote(), Field.Store.YES, 
      Field.Index.ANALYZED, 
      Field.TermVector.WITH_POSITIONS_OFFSETS);
    Field fieldContent = new Field("content", splitAndFilterString(article.getContent()), Field.Store.YES, 
      Field.Index.ANALYZED, 
      Field.TermVector.WITH_POSITIONS_OFFSETS);
    Calendar c = Calendar.getInstance();
    c.setTime(article.getCreatime());
    Field fieldYear = new Field("year", String.valueOf(c.get(1)), 
      Field.Store.YES, Field.Index.NO);
    Field fieldMonth = new Field("month", String.valueOf(c.get(2) + 1), 
      Field.Store.YES, Field.Index.NO);
    Field fieldDay = new Field("day", String.valueOf(c.get(5)), 
      Field.Store.YES, Field.Index.NO);
    Field fieldUrl = new Field("url", InitServlet.WEB_SITE_URL + article.getHtmlPath() + 
      "/" + getDateTime("yyyy-MM-dd", article.getCreatime()) + 
      "/" + article.getId() + ".html", Field.Store.YES, 
      Field.Index.NO);
    Field fieldAuthor = new Field("author", (article.getAuthor() == null) ? "" : article.getAuthor(), 
      Field.Store.YES, Field.Index.NO);
    document.add(fieldId);
    document.add(fieldTitle);
    document.add(fieldNote);
    document.add(fieldAuthor);
    document.add(fieldContent);
    document.add(fieldUrl);
    document.add(fieldYear);
    document.add(fieldMonth);
    document.add(fieldDay);
    return document;
  }

package timetask;

import java.util.Timer;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;

public class CMSListener
  implements ServletContextListener
{
  public static final long DELAY = 2000L;
  private Timer timer;

  public void contextDestroyed(ServletContextEvent arg0)
  {
    this.timer.cancel();
  }

  public void contextInitialized(ServletContextEvent arg0)
  {
    this.timer = new Timer("更新", true);

    this.timer.schedule(new IndexTask(), 2000L, 86400000L);
  }
}

lucene 添加搜索功能

package page.plugin;

import java.io.File;
import java.io.StringReader;
import java.util.ArrayList;
import javabean.SearchResult;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import page.inc.HtmlPage;
import util.Constant;
import util.InitServlet;

public class SearchPage extends HtmlPage
{
  public String print(HttpServletRequest req, HttpServletResponse resp)
    throws Exception
  {
    File indexDir = new File(Constant.SEARCH_INDEX_PATH);
    String querrys = getStringParameter("search_txt", "", req);
    int pageNo = getIntParameter("page_no", 1, req);
    if (!(querrys.equals("")))
    {
      IndexReader reader = IndexReader.open(FSDirectory.open(indexDir));
      IndexSearcher searcher = new IndexSearcher(reader);
      if (searcher != null) {
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
        String[] fields = { "title", "note", "content" };
        String[] key = { querrys, querrys, querrys };
        Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, key, fields, analyzer);
        TopDocs topDocs = searcher.search(query, 153866);
        ScoreDoc[] hits = topDocs.scoreDocs;
        if (hits.length > 0) {
          int sumPage = getTotalPage(hits.length, InitServlet.MESSAGE_PAGE_SIZE);

          SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b style=\"color:rgb(204,102,0);\">", "</b>");
          Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
          ArrayList results = new ArrayList();
          SearchResult s = null;
          String hightContent = null;
          Document doc = null;
          for (int i = (pageNo - 1) * InitServlet.MESSAGE_PAGE_SIZE; (i < pageNo * InitServlet.MESSAGE_PAGE_SIZE) && (i < hits.length); ++i) {
            s = new SearchResult();
            doc = searcher.doc(hits[i].doc);

            String content = doc.get("content");
            if ((content == null) || (content.equals(""))) {
              s.setContent("");
              s.setCnt(0);
            } else {
              s.setCnt(content.length());
              tokenStream = analyzer.tokenStream("content", new StringReader(content));
              hightContent = highlighter.getBestFragment(tokenStream, content);
              if (hightContent == null)
                s.setContent(content);
              else {
                s.setContent(hightContent);
              }
            }
            s.setNote(doc.get("note"));
            TokenStream tokenStream = analyzer.tokenStream("title", new StringReader(doc.get("title")));
            hightContent = highlighter.getBestFragment(tokenStream, doc.get("title"));
            if (hightContent == null)
              s.setTitle(doc.get("title"));
            else {
              s.setTitle(hightContent);
            }
            s.setUrl(doc.get("url"));
            s.setYear(doc.get("year"));
            s.setMonth(doc.get("month"));
            s.setDay(doc.get("day"));
            s.setAuthor(doc.get("author"));
            results.add(s);
          }

          req.setAttribute("results", (SearchResult[])results.toArray(new SearchResult[results.size()]));
          req.setAttribute("cnt", Integer.valueOf(hits.length));
          req.setAttribute("sumPage", Integer.valueOf(sumPage));
          req.setAttribute("pageSize", Integer.valueOf(InitServlet.MESSAGE_PAGE_SIZE));
        }
      }
    }
    req.setAttribute("querrys", querrys);
    req.setAttribute("pageNo", Integer.valueOf(pageNo));
    return "/jsp/plugin/search/search.jsp";
  }
}

package javabean;

public class SearchResult
{
  private String id;
  private String title;
  private String note;
  private String content;
  private String url;
  private String year;
  private String month;
  private String day;
  private String author;
  private int cnt;

  public String getId()
  {
    return this.id; }

  public void setId(String id) {
    this.id = id; }

  public String getTitle() {
    return this.title; }

  public void setTitle(String title) {
    this.title = title; }

  public String getNote() {
    return this.note; }

  public void setNote(String note) {
    this.note = note; }

  public String getContent() {
    return this.content; }

  public void setContent(String content) {
    this.content = content; }

  public String getUrl() {
    return this.url; }

  public void setUrl(String url) {
    this.url = url; }

  public String getYear() {
    return this.year; }

  public void setYear(String year) {
    this.year = year; }

  public String getMonth() {
    return this.month; }

  public void setMonth(String month) {
    this.month = month; }

  public String getDay() {
    return this.day; }

  public void setDay(String day) {
    this.day = day; }

  public int getCnt() {
    return this.cnt; }

  public void setCnt(int cnt) {
    this.cnt = cnt; }

  public String getAuthor() {
    return this.author; }

  public void setAuthor(String author) {
    this.author = author;
  }
}

protected void indexF(Article article) throws Exception {
    File indexDir = new File(Constant.SEARCH_INDEX_PATH);
    if (!(indexDir.exists())) indexDir.mkdir();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36));
    conf.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(indexDir), conf);
    indexWriter.updateDocument(new Term("id", String.valueOf(article.getId())), 
      PubFun.getLuceneDoc(article));
    indexWriter.forceMerge(1);
    indexWriter.close();
  }

ld191474639

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
lucene实战

package timetask;import java.io.File;import java.io.PrintStream;import java.util.Date;import java.util.TimerTask;import javabean.Article;import org.apache.lucene.analysis.standard.StandardAnaly
复制链接

扫一扫