public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException;
经过一天的研究,总算有些进展. 希望大家提出各种意见,欢迎拍砖! lucene版本:4.3.1
小插曲,原本想写写spetial search,但是研究研究着,就了解到了termFilter. 因此,见到代码后,不要奇怪啊.有机会的话,再写一些关于spetial的实现.虽然有现成的实现,可是,依然想弄明白,具体是怎么回事. 欢迎大家拍一些有深度的砖.
你可以拍砖,但是你拍得一定要有意义~
核心类:
package com.pptv.search.list.index.increment;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Iterator;
import org.apache.commons.lang.NumberUtils;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
public class MyOwnFilter extends Filter {
public static void main(String[] args) throws Exception {
SpatialSearchTest.main(args);
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
// lazy init if needed - no need to create a big bitset ahead of time
System.out.println(">>>> MyOwnFilter in");
final AtomicReader reader = context.reader();
// A. 生成一个结果集合,并初始化到最大
FixedBitSet result = new FixedBitSet(reader.maxDoc());
// B. 通过得到所有的 词元
final Fields fields = reader.fields();
// 显示fields
showFields(fields);
// terms操作
String termName = "able";
Terms terms = fields.terms(termName);
System.out.println(termName + "_" + "terms.size() = " + terms.size());
// C. 得到具体的每一个词元
TermsEnum reuse = null;
reuse = terms.iterator(reuse);
for (int i = 0; i < terms.size(); i++) {
reuse.next();
System.out.println("----" + i + "----" + reuse.term());
System.out.println("内容:"
+ new String(reuse.term().bytes, 0, reuse.term().length,
Charset.forName("UTF-8")));
// BytesRef text = new BytesRef("2".getBytes());
// D. 查看所有terms中,是否存在此term
// System.out.println(reuse.seekExact(text, false));
// System.out.println(text);
// E. 通过词元,对倒排词典进行反查
DocsEnum docs = null;
// no freq ,since we don't need them
docs = reuse.docs(acceptDocs, docs, DocsEnum.FLAG_NONE);
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int docId = docs.docID();
System.out.println("collected:" + docId);
result.set(docId);
}
}
System.out.println("<<<< MyOwnFilter out");
return result;
}
private void showFields(final Fields fields) {
System.out.println("fields.size() = " + fields.size());
Iterator<String> ite = fields.iterator();
int i = 0;
while (ite.hasNext()) {
++i;
System.out.println("\t" + i + ":" + ite.next());
}
}
}
入口类:
package com.pptv.search.list.index.increment;
import java.io.IOException;
import java.util.BitSet;
import java.util.Set;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DocIdBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.OpenBitSetIterator;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.Version;
@SuppressWarnings("unused")
public class SpatialSearchTest {
static Version version = Version.LUCENE_43;
public static void main(String[] args) throws Exception {
RAMDirectory d = new RAMDirectory();
IndexWriter writer = new IndexWriter(d, new IndexWriterConfig(version,
new StandardAnalyzer(version)));
doIndex(writer);
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(d));
System.out.println("maxDoc:" + searcher.getIndexReader().maxDoc());
// Query,Filter
Query query = new MyQuery();
query.setBoost(1.0001f);
System.out.println("query:" + query);
Filter filter = null;
filter = createFilter();
System.out.println("filter:" + filter);
TopDocs tds = searcher.search(query, filter, 10);
for (int i = 0; i < tds.scoreDocs.length; i++) {
ScoreDoc sd = tds.scoreDocs[i];
Document doc = searcher.doc(sd.doc);
pintDoc(doc);
}
}
private static Filter createFilter() {
Filter filter;
// filter = new MyFilter(new Term("able", "1"));
filter = new MyOwnFilter();
return filter;
}
private static void pintDoc(Document doc) {
String lat = doc.get("lat");
String lng = doc.get("lng");
System.out.println("(" + lng + "," + lat + ")");
}
private static void doIndex(IndexWriter writer) throws Exception,
IOException {
for (int i = 0; i < 100 && i < 5; i++) {
Document document = new Document();
indexLocation(document, 100l + i, (Math.random() * 100l) + i * i,
i % 2 == 0 ? "0" : "abcd你好");
writer.addDocument(document);
}
writer.forceMerge(1);
writer.close();
}
private static void indexLocation(Document document, double longitude,
double latitude, String able) throws Exception {
DoubleField lat = new DoubleField("lat", latitude, Store.YES);
DoubleField lng = new DoubleField("lng", longitude, Store.YES);
document.add(new StringField("able", able, Store.YES));
document.add(lat);
document.add(lng);
}
}
实际上就是通过Filter暴露给我们的下面这个方法
public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException;
通过context得到reader,再得到fields,再得到terms,最后通过
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException;
方法,将结果合并封装,并返回.注意,这个过程是在搜索过程中,执行的.
下面这句话,希望高手们拍下砖,
目前,我猜想lucene是先query,后filter的?对吗? 怎么都感觉不对.希望明示,改天有机会,再来验证这个问题.