现在开发搜索系统,使用的是开源界比较推崇的Lucene,版本是Maven库上面的3.5,这个版本里面有很多方法都不建议使用了。所以代码创建的方式有那么点变化,我使用的分词器是IkAnalyzer。Lucene全文检索的功能是很强大的,我们在做电子商务系统的时候肯定是会遇到排序的问题,比如销量,比如价格等等,为了方便客户我们便需要Lucene的排序功能,其实Lucene中的排序很简单也很方便,我们在创建搜索器的时候增加一个Sort 就行了,Sort的构造函数需要SortField,具体大家可以去参考Lucene的源代码,我下面写了一个简单的例子来演示怎么使用。代码如下:
- import java.io.IOException;
- import java.util.Locale;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.document.Field.Index;
- import org.apache.lucene.document.Field.Store;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.search.FieldComparator;
- import org.apache.lucene.search.FieldComparatorSource;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.Sort;
- import org.apache.lucene.search.SortField;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.RAMDirectory;
- import org.apache.lucene.util.Version;
- import org.junit.Test;
- import org.wltea.analyzer.lucene.IKAnalyzer;
- import org.wltea.analyzer.lucene.IKQueryParser;
- import org.wltea.analyzer.lucene.IKSimilarity;
- public class LuceneSortTest {
- /**
- * Lucene排序查询
- */
- @Test
- public void testSort() throws Exception{
- String name = "name";
- String field = "price";
- String names[] = {"衣服","睡衣","漂亮衣服","好看衣服","男士衣服","女士衣服"};
- double price[] = {12.0,13.0,8.8,9.6,7.5,12.1};
- RAMDirectory dir = new RAMDirectory();
- Analyzer analyzer = new IKAnalyzer();
- IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, analyzer);
- IndexWriter writer = new IndexWriter(dir, cfg);
- for (int i=0;i<names.length;i++) {
- Document doc = new Document();
- doc.add(new Field(name, names[i], Store.YES, Index.ANALYZED));
- doc.add(new Field(field, String.valueOf(price[i]), Store.YES, Index.NOT_ANALYZED));
- writer.addDocument(doc);
- }
- writer.commit();
- writer.close();
- IndexReader reader = IndexReader.open(dir);
- Query query = IKQueryParser.parse(name, "衣服");
- Sort sort = new Sort(new SortField(field,SortField.DOUBLE,true)); //排序 false 升序 true降序
- IndexSearcher isearcher = new IndexSearcher(reader);
- isearcher.setSimilarity(new IKSimilarity());
- TopDocs topDocs = isearcher.search(query, 5, sort);
- ScoreDoc scoreDocs[] = topDocs.scoreDocs;
- for (ScoreDoc scoreDoc : scoreDocs) {
- Document doc = isearcher.doc(scoreDoc.doc);
- System.out.println(doc.get(field));
- }
- isearcher.close();
- dir.close();
- }
- }
import java.io.IOException;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;
public class LuceneSortTest {
/**
* Lucene排序查询
*/
@Test
public void testSort() throws Exception{
String name = "name";
String field = "price";
String names[] = {"衣服","睡衣","漂亮衣服","好看衣服","男士衣服","女士衣服"};
double price[] = {12.0,13.0,8.8,9.6,7.5,12.1};
RAMDirectory dir = new RAMDirectory();
Analyzer analyzer = new IKAnalyzer();
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, analyzer);
IndexWriter writer = new IndexWriter(dir, cfg);
for (int i=0;i<names.length;i++) {
Document doc = new Document();
doc.add(new Field(name, names[i], Store.YES, Index.ANALYZED));
doc.add(new Field(field, String.valueOf(price[i]), Store.YES, Index.NOT_ANALYZED));
writer.addDocument(doc);
}
writer.commit();
writer.close();
IndexReader reader = IndexReader.open(dir);
Query query = IKQueryParser.parse(name, "衣服");
Sort sort = new Sort(new SortField(field,SortField.DOUBLE,true)); //排序 false 升序 true降序
IndexSearcher isearcher = new IndexSearcher(reader);
isearcher.setSimilarity(new IKSimilarity());
TopDocs topDocs = isearcher.search(query, 5, sort);
ScoreDoc scoreDocs[] = topDocs.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
Document doc = isearcher.doc(scoreDoc.doc);
System.out.println(doc.get(field));
}
isearcher.close();
dir.close();
}
}
这段代码执行的结果为:
- 12.1
- 12.0
- 9.6
- 8.8
- 7.5
12.1
12.0
9.6
8.8
7.5
在SortField里面有很多静态常量来提供给排序器作为排序的依据,Lucene里面自己定义好的排序实现已经基本能够满足我们的需求了,如果自己想定义一个自己的排序算法,那么可以继承FieldComparatorSource实现里面的方法:
- public FieldComparator newComparator(String fieldname, int numHits,
- int sortPos, boolean reversed) throws IOException {
- // TODO Auto-generated method stub
- return new DoubleFieldComparatorSource.DoubleFieldComparator();
- }
public FieldComparator newComparator(String fieldname, int numHits,
int sortPos, boolean reversed) throws IOException {
// TODO Auto-generated method stub
return new DoubleFieldComparatorSource.DoubleFieldComparator();
}
里面的FieldComparator是一个抽象类,主要就是用来作比较使用的,定义自己的排序算法关键也在于继承这个抽象类,然后实现里面的方法,需要实现的方法如下:
- class DoubleFieldComparator extends FieldComparator{
- @Override
- public int compare(int slot1, int slot2) {
- return 0;
- }
- @Override
- public void setBottom(int slot) {
- }
- @Override
- public int compareBottom(int doc) throws IOException {
- return 0;
- }
- @Override
- public void copy(int slot, int doc) throws IOException {ub
- }
- @Override
- public void setNextReader(IndexReader reader, int docBase)
- throws IOException {
- }
- @Override
- public Object value(int slot) {
- return null;
- }
- }
class DoubleFieldComparator extends FieldComparator{
@Override
public int compare(int slot1, int slot2) {
return 0;
}
@Override
public void setBottom(int slot) {
}
@Override
public int compareBottom(int doc) throws IOException {
return 0;
}
@Override
public void copy(int slot, int doc) throws IOException {ub
}
@Override
public void setNextReader(IndexReader reader, int docBase)
throws IOException {
}
@Override
public Object value(int slot) {
return null;
}
}
这里顺便列出Sort已经提供的比较值:
- SortField.SCORE 按积分排序
- SortField.DOC 按文档排序
- SortField.AUTO 域的值为int、long、float都有效
- SortField.STRING 域按STRING排序
- SortField.FLOAT
- SortField.LONG
- SortField.DOUBLE
- SortField.SHORT
- SortField.CUSTOM 通过比较器排序
- SortField.BYTE