TestCustomFilter.java
package org.itat.lucene.test;
import org.itat.lucene.util.CustomFilter;
import org.junit.Test;
/**
* @ProjectName:lucene_advance_search03
* @ClassName:TestCustomFilter
* @Description:自定义filter_合理的设计方式
* @date: 2015-4-20下午06:05:03
* @author: 半仙儿
* @version: V1.0
* @date:2015-4-20下午06:05:03
*/
public class TestCustomFilter {
/**
*
*@MethodName:test01
*@Description:过滤filename为RUNNING.aaaaa、server.xml的文件
*@author:半仙儿
*@return void
*@date:2015-4-20下午05:51:31
*/
@Test
public void test01() {
CustomFilter cf = new CustomFilter();
cf.searchByCustomFilter();
}
}
TestSearch.java
package org.itat.lucene.test;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermRangeFilter;
import org.apache.lucene.search.WildcardQuery;
import org.itat.lucene.util.FileIndexUtil;
import org.itat.lucene.util.SearchTest;
import org.junit.Before;
import org.junit.Test;
public class TestSearch {
private SearchTest st;
@Before
public void init() {
st = new SearchTest();
}
@Test
public void index() {
FileIndexUtil.index(true);
}
@Test
public void test01() {
// 不进行排序
st.searcherBySort("java", Sort.INDEXORDER);
// 以Doc的Id进行排序
// st.searcherBySort("java", Sort.INDEXORDER);
// 通过评分进行排序--设置了排序,就不能看到评分了。
// st.searcherBySort("java", Sort.RELEVANCE);
// 根据文件的大小进行排序
// st.searcherBySort("java", new Sort(new SortField("size",
// SortField.INT)));
// 通过日期进行排序
// st.searcherBySort("java", new Sort(new SortField("date",
// SortField.LONG)));
// 通过文件名进行排序
// st.searcherBySort("java", new Sort(
// new SortField("filename", SortField.STRING)));
// 使用降序进行排序(通过设置SortField的最后的一个参数设置降序排序)
// st.searcherBySort("java", new Sort(new SortField("filename",
// SortField.STRING, true)));
// 根据文件的大小和评分进行排序
st.searcherBySort("java", new Sort(
new SortField("size", SortField.INT), SortField.FIELD_SCORE));
}
@Test
public void test02() {
Filter tr = new TermRangeFilter("filename", "java.hhh", "java.ttt",
true, true);
tr = NumericRangeFilter.newIntRange("size", 500, 4900, true, true);
//通过query进行过滤
tr = new QueryWrapperFilter(new WildcardQuery(new Term("filename",
"*.ff")));
st.searcherByFilter("java", tr);
}
@Test
public void test03() {
Query query = new WildcardQuery(new Term("filename", "c*"));
st.searcherByQuery(query);
}
}<span style="font-family:Arial, Helvetica, sans-serif;"><span style="white-space: normal;">
</span></span>
CustomFilter.java
package org.itat.lucene.util;
import java.text.SimpleDateFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
public class CustomFilter {
public void searchByCustomFilter() {
try {
IndexSearcher searcher = new IndexSearcher(IndexReader
.open(FileIndexUtil.getDirectory()));
Query q = new TermQuery(new Term("content", "java"));
TopDocs tds = null;
tds = searcher.search(q, new MyIDFilter(new FilterAccessor() {
@Override
public String[] values() {
// return new String[] { "10", "20", "30" };
return new String[]{"RUNNING.aaaaa","server.xml"};
}
@Override
public boolean set() {
return true;
}
@Override
public String getField() {
// return "id";
return "filename";
}
}), 200);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (ScoreDoc sd : tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc + ":(" + sd.score + ")["
+ d.get("filename") + "[" + d.get("path") + "]-->"
+ d.get("size") + "----------->" + d.get("id"));
}
searcher.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
FileIndexUtil.java
package org.itat.lucene.util;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class FileIndexUtil {
private static Directory directory = null;
static {
try {
directory = FSDirectory.open(new File("d:/lucene/files"));
} catch (Exception e) {
e.printStackTrace();
}
}
public static Directory getDirectory() {
return directory;
}
/**
*@MethodName:index
*@Description:创建索引
*@param hasNew是否要新建索引
*@author:半仙儿
*@return void
*@date:2015-4-15下午04:05:04
*/
public static void index(boolean hasNew) {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
if (hasNew) {
writer.deleteAll();
}
File file = new File("d:/lucene/example");
Document doc = null;
//定义一个随机数
Random ran=new Random();
int index=0;
for (File f : file.listFiles()) {
//分数
int score=ran.nextInt(600);
doc = new Document();
doc.add(new Field("id", String.valueOf(index++),Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("content", new FileReader(f)));
doc.add(new Field("filename", f.getName(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("path", f.getAbsolutePath(), Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new NumericField("date", Field.Store.YES, true)
.setLongValue(f.lastModified()));
doc.add(new NumericField("size", Field.Store.YES, true)
.setIntValue((int) f.length()));
doc.add(new NumericField("score",Field.Store.NO,true).setIntValue(score));
writer.addDocument(doc);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (writer != null)
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
FilterAccessor.java
package org.itat.lucene.util;
public interface FilterAccessor {
public String [] values();
public String getField();
public boolean set();
}<span style="font-family:Arial, Helvetica, sans-serif;"><span style="white-space: normal;">
</span></span>
MyIDFilter.java
package org.itat.lucene.util;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.OpenBitSet;
import org.omg.CORBA.FREE_MEM;
/**
* @ProjectName:lucene_advance_search
* @ClassName:MyIDFilter
* @Description:自定义过滤器,网站做促销商品搜索的时候需要用到
* @date: 2015-4-20下午04:26:58
* @author: 半仙儿
* @version: V1.0
* @date:2015-4-20下午04:26:58
*/
public class MyIDFilter extends Filter {
// 要过滤的ID
private FilterAccessor accessor;
public MyIDFilter(FilterAccessor accessor) {
this.accessor = accessor;
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
// 创建一个openBitSet
OpenBitSet obs = new OpenBitSet(reader.maxDoc());
if (accessor.set()) {
set(reader, obs);
} else {
clear(reader, obs);
}
return obs;
}
private void set(IndexReader reader, OpenBitSet obs) {
try {
// 先把元素填满
int[] docs = new int[1];
int[] freqs = new int[1];
// 获取ID所在的DOC的位置,并将其设置为0
for (String delId : accessor.values()) {
// 获取TermDocs
TermDocs tds = reader.termDocs(new Term(accessor.getField(),
delId));
// 会将查询出来的对象的位置存储到docs中,出现的频率存储到freqs,返回查询出来的条数
int count = tds.read(docs, freqs);
if (count == 1) {
// 将这个位置的元素删除
obs.set(docs[0]);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
@SuppressWarnings("unused")
private void clear(IndexReader reader, OpenBitSet obs) {
try {
// 先把元素填满
obs.set(0, reader.maxDoc());
int[] docs = new int[1];
int[] freqs = new int[1];
// 获取ID所在的DOC的位置,并将其设置为0
for (String delId : accessor.values()) {
// 获取TermDocs
TermDocs tds = reader.termDocs(new Term(accessor.getField(),
delId));
// 会将查询出来的对象的位置存储到docs中,出现的频率存储到freqs,返回查询出来的条数
int count = tds.read(docs, freqs);
if (count == 1) {
// 将这个位置的元素删除
obs.clear(docs[0]);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
SearchTest.java
package org.itat.lucene.util;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
public class SearchTest {
// 高效获取indexReader
private static IndexReader reader = null;
static {
try {
reader = IndexReader.open(FileIndexUtil.getDirectory());
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public IndexSearcher getSeacher() {
try {
if (reader == null) {
reader = IndexReader.open(FileIndexUtil.getDirectory());
} else {
IndexReader tr = IndexReader.openIfChanged(reader);
if (tr != null) {
reader.close();
reader = tr;
}
}
return new IndexSearcher(reader);
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public void searcherByFilter(String queryStr, Filter filter) {
try {
IndexSearcher searcher = getSeacher();
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
Query query = parser.parse(queryStr);
TopDocs tds = null;
if (filter != null) {
tds = searcher.search(query, filter, 50);
} else {
tds = searcher.search(query, 50);
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (ScoreDoc sd : tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc + ":(" + sd.score + ")["
+ d.get("filename") + "[" + d.get("path") + "]-->"
+ d.get("size") + "----"
+ sdf.format(new Date(Long.valueOf(d.get("date"))))
+ "]");
}
searcher.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void searcherByQuery(Query queryStr) {
try {
IndexSearcher searcher = getSeacher();
TopDocs tds = null;
tds = searcher.search(queryStr, 50);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (ScoreDoc sd : tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc + ":(" + sd.score + ")["
+ d.get("filename") + "[" + d.get("path") + "]-->"
+ d.get("size") + "----"
+ sdf.format(new Date(Long.valueOf(d.get("date"))))
+ "]");
}
searcher.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void searcherBySort(String queryStr, Sort sort) {
try {
IndexSearcher searcher = getSeacher();
QueryParser parser = new QueryParser(Version.LUCENE_35, "content",
new StandardAnalyzer(Version.LUCENE_35));
Query query = parser.parse(queryStr);
TopDocs tds = null;
if (sort != null) {
tds = searcher.search(query, 50, sort);
} else {
tds = searcher.search(query, 50);
}
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
for (ScoreDoc sd : tds.scoreDocs) {
Document d = searcher.doc(sd.doc);
System.out.println(sd.doc + ":(" + sd.score + ")["
+ d.get("filename") + "[" + d.get("path") + "]-"+d.get("score")+"->"
+ d.get("size") + "----"
+ sdf.format(new Date(Long.valueOf(d.get("date"))))
+ "]");
}
searcher.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}