一、Lucene的分页搜索
Lucene的分页不像数据库中的limit的方式,而是提供了一种“再查询”的方式。什么是“再查询”呢?就是第一次把所有的数据都取出来,第二次查询再根据需求,从第几条取到第几条,分两步进行查询,所以叫“再查询”。
二、测试代码
package com.wsy;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
public class FileIndexUtils {
private static Directory directory;
private static IndexReader indexReader;
static {
try {
directory = FSDirectory.open(new File("E:\\Lucene\\IndexLibrary"));
} catch (IOException e) {
e.printStackTrace();
}
}
public FileIndexUtils() {
try {
indexReader = IndexReader.open(directory);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void index(boolean update) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
if (update) {
indexWriter.deleteAll();
}
File[] files = new File("E:\\Lucene\\SearchSource").listFiles();
for (File file : files) {
Document document = new Document();
document.add(new Field("content", new FileReader(file)));
document.add(new Field("fileName", file.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new Field("path", file.getAbsolutePath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
document.add(new NumericField("date", Field.Store.YES, true).setLongValue(file.lastModified()));
document.add(new NumericField("size", Field.Store.YES, true).setIntValue((int) (file.length() / 1024)));
indexWriter.addDocument(document);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void searchPage(String queryString, int pageIndex, int pageSize) {
try {
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
QueryParser queryParser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
Query query = queryParser.parse(queryString);
TopDocs topDocs = indexSearcher.search(query, pageIndex * pageSize);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
// 分页查询
for (int i = (pageIndex - 1) * pageSize; i < pageIndex * pageSize; i++) {
Document document = indexSearcher.doc(scoreDocs[i].doc);
System.out.println(scoreDocs[i].doc + ":" + document.get("path") + " " + document.get("fileName"));
}
System.out.println("-------------------------------------------------");
// 不分页查询
for (int i = 0; i < scoreDocs.length; i++) {
Document document = indexSearcher.doc(scoreDocs[i].doc);
System.out.println(scoreDocs[i].doc + ":" + document.get("path") + " " + document.get("fileName"));
}
indexSearcher.close();
} catch (ParseException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
FileIndexUtils.index(true);
FileIndexUtils fileIndexUtils = new FileIndexUtils();
fileIndexUtils.searchPage("java", 2, 3);
}
}
在searchPage()里面,我们对分页和不分页都做了查询,用于对比,可以知道分页结果正确。