package org.apache.lucene.demo;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.StringReader;
import java.util.Date;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import thtf.ebuilder.website.search.DBIndex;
/** Simple command-line based search demo. */
public class SearchFiles {
private SearchFiles() {}
/** Simple command-line based search demo. */
public static void main(String[] args) throws Exception {
String field = "INFO_CONTENT";
String word = "舞蹈";
int hitsPerPage = 10;
IndexReader reader = DirectoryReader.open(FSDirectory.open(DBIndex._$.getIndexFile()));
IndexSearcher searcher = new IndexSearcher(reader);
QueryParser parser = new QueryParser(Version.LUCENE_47, field, DBIndex._$.analyzer);
Query query = parser.parse(word);
System.out.println("Searching for: " + query.toString(field));
//排序
Sort sort=new Sort(new SortField[]{new SortField("info_id", SortField.Type.INT, true)});
//过滤
BooleanQuery bqf = new BooleanQuery();
bqf.add(query,BooleanClause.Occur.SHOULD);
Date start = new Date();
TopDocs tDocs=searcher.search(query,new QueryFilter(bqf),100,sort);
System.out.println("查询到:"+tDocs.scoreDocs.length);
Date end = new Date();
System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
doPagingSearch(word, searcher, query, hitsPerPage);
reader.close();
}
/**
* This demonstrates a typical paging search scenario, where the search engine presents
* pages of size n to the user. The user can then go to the next page if interested in
* the next hits.
*
* When the query is executed for the first time, then only enough results are collected
* to fill 5 result pages. If the user wants to page beyond this limit, then the query
* is executed another time and all hits are collected.
*
*/
public static void doPagingSearch(String word,IndexSearcher searcher, Query query,
int hitsPerPage) throws IOException {
// Collect enough docs to show 5 pages
TopDocs results = searcher.search(query, 5 * hitsPerPage);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
end = Math.min(hits.length, start + hitsPerPage);
System.out.println(start+"-"+end);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>","</font></b>");
Highlighter highlighter=new Highlighter(formatter, new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(400));
String content=doc.get("info_title");
if(content!=null){
TokenStream tokenstream=DBIndex._$.analyzer.tokenStream(word, new StringReader(content));
try {
content=highlighter.getBestFragment(tokenstream, content);
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}
System.out.println(doc.get("info_id")+"\t"+content);
}
}
}
}