一、跨度域查询
基类是:SpanQuery
子类:SpantermQuery; //测试用的这个子类当然还有其他
二、测试
public class SpanTest {
Directory dir;
IndexWriter writer;
IndexReader reader;
IndexSearcher search;
String[] ceshi;
//初始化把索引存在内存中做测试
public void init() throws IOException{
dir=new RAMDirectory();
writer=writer(dir);
ceshi=new String[]{"i like you","are you ok"};
}
public IndexWriter writer(Directory dir) throws IOException{
Analyzer analyzer=new WhitespaceAnalyzer(Version.LUCENE_42);
IndexWriterConfig config=new IndexWriterConfig(Version.LUCENE_42,analyzer);
return new IndexWriter(dir, config);
}
public void createWrite() throws IOException{
for(int i=0;i<ceshi.length;i++){
Document doc=new Document();
doc.add(new TextField("contents",ceshi[i],Store.YES));
writer.addDocument(doc);
}
writer.close();
}
public void test() throws IOException{
reader=DirectoryReader.open(dir);
search=new IndexSearcher(reader);
SpanTermQuery query=new SpanTermQuery(new Term("contents","you"));
Map<Term,TermContext>m=new HashMap<Term,TermContext>();
TermContext termContext=TermContext.build(search.getTopReaderContext(),query.getTerm(),false);
m.put(query.getTerm(), termContext);
Bits bits = new Bits.MatchAllBits(0);
Spans spans=query.getSpans(reader.getContext().leaves().get(0),bits, m);
int num=0;
System.out.println(query.getTerm());
while(spans.next()){
num++;
int id=spans.doc();
Document d=reader.document(id);
Analyzer analyzer=new WhitespaceAnalyzer(Version.LUCENE_42);
TokenStream ts= analyzer.tokenStream("contents", new StringReader(d.get("contents")));
CharTermAttribute termAttribute = ts.addAttribute(CharTermAttribute.class);
ts.reset(); //此行,不能少,不然会报 java.lang.ArrayIndexOutOfBoundsException
StringBuffer buffer=new StringBuffer("");
buffer.append("");
int k=0;
while(ts.incrementToken()){
if(k==spans.start()){
buffer.append("<");
}
buffer.append(termAttribute.toString());
if(k+1==spans.end()){
buffer.append(">");
}
buffer.append(" ");
k++;
}
System.out.println(spans.start()+" "+spans.end());
System.out.println(buffer);
//if(num==3)break;
//break;
}
if(num==0){
System.out.println("no spans");
}
}
public static void main(String[] args) throws IOException {
SpanTest s=new SpanTest();
s.init();
s.createWrite();
s.test();
}
}
运行结果:
最后:
int k=0;
while(ts.incrementToken()){
if(k==spans.start()){
buffer.append("<");
}
buffer.append(termAttribute.toString());
if(k+1==spans.end()){
buffer.append(">");
}
buffer.append(" ");
k++;
}
但是当用其他的分词器里面的匹配写法就出错了,因为其他分词器可能了一些会分词的时候会根据
需求去掉一些词,例如StopWords或者其他的一些词,而那个匹配是根据从0开始的,所以当用
其他的分词器这个地方不是一个真正的单词位置。