所谓PhraseQuery,就是通过短语来检索。
例如现在有一个字符串,“the quick brown fox jumped over the lazy dog”,我们不知道其中的精确的短语,我们仍然可以通过短语“quick”、"fox"来查找文档。slop就是从一个词到另一个词的距离。
下面是一个模仿lucene in action 第二版的一个例子。
package com.cn;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class PharserQueryTest {
public static void main(String []args)throws Exception {
RAMDirectory directory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34)));
Document doc = new Document();
doc.add(new Field("field","the quick brown fox jumped over the lazy dog",Field.Store.YES,Field.Index.ANALYZED));
indexWriter.addDocument(doc);
indexWriter.close();
match(directory,new String[]{"quick","jumped"},1);
match(directory,new String[]{"quick","jumped"},2);
match(directory,new String[]{"jumped","quick"},3);
match(directory,new String[]{"jumped","quick"},4);
match(directory,new String[]{"quick","jumped","lazy"},3);
match(directory,new String[]{"quick","jumped","lazy"},4);
match(directory,new String[]{"lazy","jumped","quick"},7);
match(directory,new String[]{"lazy","jumped","quick"},8);
}
public static void match(Directory directory,String []pharser,int slop)throws Exception {
IndexSearcher indexSearcher = new IndexSearcher(directory);
PhraseQuery query = new PhraseQuery();
query.setSlop(slop);
for(String s:pharser){
query.add(new Term("field",s));
}
TopDocs topDocs = indexSearcher.search(query, 10);
System.out.println(topDocs.totalHits);
}
}
下面是运行结果(0:查不到;1:查到):
0
1
0
1
0
1
0
1