import java.io.IOException;
import java.text.SimpleDateFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
/**
* 自定义过滤器查询
* @author
*
*/
public class CustomFilter {
/**
* 使用自定义的Filter来查询
*/
public void searchByCustomFilter() {
try {
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(FileIndexUtils.getDirectory()));
Query q = new TermQuery(new Term("content","java"));
TopDocs tds = null;
//自定义的Filter:MyIDFilter
tds = searcher.search(q, new MyIDFilter(new FilterAccessor() {
@Override
public String[] values() {
return new String[]{"impala","catalogd","impalad","statestored"};
}
@Override
public boolean set() {
return false;
}
@Override
public String getField() {
return "filename";
}
}),1000);
//tds = searcher.search(q,1000);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
for(ScoreDoc sd:tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc+":("+sd.score+")" +
"["+d.get("filename")+"["+d.get("path")+"]--->"+
d.get("size")+"------------>"+d.get("id"));
}
searcher.getIndexReader().close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
package org.adv.lucene.test;
import org.adv.lucene.util.CustomFilter;
import org.junit.Test;
public class TestCustomFilter {
@Test
public void test01() {
CustomFilter cf = new CustomFilter();
cf.searchByCustomFilter();
}
}
package org.adv.lucene.util;
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.OpenBitSet;
/**
* 自定义过滤器
* 根据ID进行过滤
* @author
*
*/
public class MyIDFilter extends Filter {
private FilterAccessor accessor;
public MyIDFilter(FilterAccessor accessor) {
this.accessor = accessor;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
//创建一个bit,默认所有的元素都是0
AtomicReader reader=context.reader();
OpenBitSet obs = new OpenBitSet(reader.maxDoc());//FixedBitSet fbs=new FixedBitSet(reader.maxDoc());
if(accessor.set()) {
set(reader,obs);
} else {
clear(reader, obs);
}
return obs;
}
private void set(AtomicReader reader,OpenBitSet obs) {
try {
//获取id所在的doc的位置,并且将其设置为0
for(String delId:accessor.values()) {
//Term出现的频率
//int count=reader.docFreq(new Term(accessor.getField(),delId));
DocsEnum de=reader.termDocsEnum(new Term(accessor.getField(),delId));
//DocsAndPositionsEnum dpe= reader.termPositionsEnum(new Term(accessor.getField(),delId));
if(de!=null){
int docid=de.nextDoc();
if(docid!=-1) {
obs.set(docid);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
private void clear(AtomicReader reader,OpenBitSet obs) {
try {
//先把元素填满
obs.set(0,reader.maxDoc());
//获取id所在的doc的位置,并且将其删除
for(String delId:accessor.values()) {
//Term出现的频率
//int count=reader.docFreq(new Term(accessor.getField(),delId));
DocsEnum de=reader.termDocsEnum(new Term(accessor.getField(),delId));
//DocsAndPositionsEnum dpe= reader.termPositionsEnum(new Term(accessor.getField(),delId));
if(de!=null){
int docid=de.nextDoc();
if(docid!=-1) {
//将这个位置的元素删除
obs.clear(docid);
}
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
package org.adv.lucene.util;
/**
* 过滤器数据接口
* @author
*
*/
public interface FilterAccessor {
public String[] values();
public String getField();
public boolean set();
}