lucene 4.7 （2）全文检索之查询

最新推荐文章于 2024-10-10 09:56:47 发布
iteye_20353
最新推荐文章于 2024-10-10 09:56:47 发布
阅读量124
点赞数
文章标签： java
本文链接：https://blog.csdn.net/iteye_20353/article/details/82583147
版权
package org.apache.lucene.demo;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.io.StringReader;
import java.util.Date;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import thtf.ebuilder.website.search.DBIndex;

/** Simple command-line based search demo. */
public class SearchFiles {

  private SearchFiles() {}

  /** Simple command-line based search demo. */
  public static void main(String[] args) throws Exception {

    String field = "INFO_CONTENT";
    String word = "舞蹈";
    int hitsPerPage = 10;
    
    IndexReader reader = DirectoryReader.open(FSDirectory.open(DBIndex._$.getIndexFile()));
    IndexSearcher searcher = new IndexSearcher(reader);
    QueryParser parser = new QueryParser(Version.LUCENE_47, field, DBIndex._$.analyzer);
      
      Query query = parser.parse(word);
      System.out.println("Searching for: " + query.toString(field));
            
      //排序
      Sort sort=new Sort(new SortField[]{new SortField("info_id", SortField.Type.INT, true)});
      //过滤
      BooleanQuery bqf = new BooleanQuery();
      bqf.add(query,BooleanClause.Occur.SHOULD);
       
      
        Date start = new Date();
       TopDocs tDocs=searcher.search(query,new QueryFilter(bqf),100,sort);
       System.out.println("查询到："+tDocs.scoreDocs.length);
        Date end = new Date();
        System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");

      doPagingSearch(word, searcher, query, hitsPerPage);
    reader.close();
  }

  /**
   * This demonstrates a typical paging search scenario, where the search engine presents 
   * pages of size n to the user. The user can then go to the next page if interested in
   * the next hits.
   * 
   * When the query is executed for the first time, then only enough results are collected
   * to fill 5 result pages. If the user wants to page beyond this limit, then the query
   * is executed another time and all hits are collected.
   * 
   */
  public static void doPagingSearch(String word,IndexSearcher searcher, Query query, 
                                     int hitsPerPage) throws IOException {
 
    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;
    
    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);
      end = Math.min(hits.length, start + hitsPerPage);
      System.out.println(start+"-"+end);
      for (int i = start; i < end; i++) {
        Document doc = searcher.doc(hits[i].doc);
        SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
        Highlighter highlighter=new Highlighter(formatter, new QueryScorer(query));
        highlighter.setTextFragmenter(new SimpleFragmenter(400));
        String content=doc.get("info_title");
        if(content!=null){
          TokenStream tokenstream=DBIndex._$.analyzer.tokenStream(word, new StringReader(content));
          try {
            content=highlighter.getBestFragment(tokenstream, content);
          } catch (InvalidTokenOffsetsException e) {
            e.printStackTrace();
          }
          System.out.println(doc.get("info_id")+"\t"+content);
        }
      }
  }
}