Lucene主要的搜索API:
类 | 目的 |
---|---|
IndexSearcher | 搜索引擎的门户。所有搜索都通过IndexSearcher进行,它们会调用该类中重载的search方法 |
Query(及其子类) | 封装某种查询类型的具体子类。Query实例将被传递给IndexSearcher的search方法 |
QueryParser | 将用户输入的(可读的)查询表达式处理成具体的Query对象 |
TopDocs | 保持由IndexSearcher.search()方法返回的具有较高评分的顶部文档 |
ScoreDoc | 提供对TopDocs中每条搜索结果的访问接口 |
实现简单的搜索功能
QueryParser处理的表达式范例
查询表达式 | 匹配文档 |
---|---|
java | 默认域包含java项的文档 |
java junit/java OR junit | 默认域包含java和junit中一个或两个的文档 |
+java +junit/java AND junit | 默认域同时包含java和junit的文档 |
title:ant | title域中包含ant项的文档 |
title:extreme -subject:sports/title:extreme AND NOT subject:sports | title域中包含extreme且subject域中不包含sports的文档 |
(agile OR extreme) AND methodogy | 默认域中包含methodogy且包含agile和extreme中的一个或两个的文档 |
title:”junit in action” | title域为junit in action的文档 |
title:”junit action” ~5 | title域中junit和action之间距离小于5的文档 |
java* | 包含由java开头的项的文档,例如javaspaces,javaserver,java.net和java本身 |
java~ | 包含与单词java相近的文档,如lava |
lastmodified:[1/1/09 TO 12/31/09] | lastmodified域值在2009年1月1号和2009年12月31号之间的文档 |
代码
- testTerm():从subject field里面查“ant”和“junit”
- testQueryParser():通过QueryParser生成query
BasicSearchingTest.java
import junit.framework.TestCase;
import lia.common.TestUtil;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Version;
// From chapter 3
public class BasicSearchingTest extends TestCase {
public void testTerm() throws Exception {
Directory dir = TestUtil.getBookIndexDirectory(); //A 从TestUtil类获取路径信息
IndexSearcher searcher = new IndexSearcher(dir); //B 创建IndexSearcher类
Term t = new Term("subject", "ant");
Query query = new TermQuery(t);
TopDocs docs = searcher.search(query, 10);
assertEquals("Ant in Action", //C 确认查到一个ant结果
1, docs.totalHits); //C
t = new Term("subject", "junit");
docs = searcher.search(new TermQuery(t), 10);
assertEquals("Ant in Action, " + //D 确认查到两个junit结果
"JUnit in Action, Second Edition", //D
2, docs.totalHits); //D
searcher.close();
dir.close();
}
/*
#A Obtain directory from TestUtil
#B Create IndexSearcher
#C Confirm one hit for "ant"
#D Confirm two hits for "junit"
*/
public void testQueryParser() throws Exception {
Directory dir = TestUtil.getBookIndexDirectory();
IndexSearcher searcher = new IndexSearcher(dir);
QueryParser parser = new QueryParser(Version.LUCENE_30, //A 建立QueryParser对象
"contents", //A
new SimpleAnalyzer());//A
Query query = parser.parse("+JUNIT +ANT -MOCK"); //B 解析用户文本
TopDocs docs = searcher.search(query, 10);
assertEquals(1, docs.totalHits);
Document d = searcher.doc(docs.scoreDocs[0].doc);
assertEquals("Ant in Action", d.get("title"));
query = parser.parse("mock OR junit"); //B
docs = searcher.search(query, 10);
assertEquals("Ant in Action, " +
"JUnit in Action, Second Edition",
2, docs.totalHits);
searcher.close();
dir.close();
}
/*
#A Create QueryParser
#B Parse user's text
*/
}