原文博客:http://blog.csdn.net/tianlincao/article/details/6867127
下面贴出我练习的代码以及一些修改和注释说明:
测试用的版本是lucene2.3.0,lucene1.4以及lucene3.x及以上的版本api会有不同,需要修改下面的一些方法。
TextFileIndexer:
public class TextFileIndexer {
public static void main(String[] args) throws Exception{
File fileDir = new File("F:\\Analyzer\\Resources");
File indexDir = new File("F:\\Analyzer\\Index");
Analyzer luceneAnalyzer = new StandardAnalyzer();
// true表示如果原来已经有索引文件在索引目录下,覆盖
IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer, true);
File[] textFiles = fileDir.listFiles();
long startTime = new Date().getTime();
for(int i = 0; i < textFiles.length; i++){
if(textFiles[i].isFile() && textFiles[i].getName().endsWith(".txt")){
System.out.println("File " + textFiles[i].getCanonicalPath() + "正在被索引");
String temp = FileReaderAll(textFiles[i].getCanonicalPath(), "UTF-8");
System.out.println(temp);
Document document = new Document();
// 建立两条索引
Field FieldPath = new Field("path", textFiles[i].getPath(),
Field.Store.YES, Field.Index.NO);
Field FieldBody = new Field("body", temp, Field.Store.YES,
Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
document.add(FieldPath);
document.add(FieldBody);
// 入库
indexWriter.addDocument(document);
}
}
// 整合优化索引
indexWriter.optimize();
indexWriter.close();
}
public static String FileReaderAll(String fileName, String charset) throws IOException{
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charset));
String line = new String();
String temp = new String();
while((line = reader.readLine()) != null){
temp += line;
}
reader.close();
return temp;
}
}
TestQuery:
public class TestQuery {
public static void main(String[] args) throws Exception{
Hits hits = null;
String queryString = "is";
Query query = null;
IndexSearcher searcher = new IndexSearcher("F:\\Analyzer\\Index");
Analyzer analyzer = new StandardAnalyzer();
try{
QueryParser qp = new QueryParser("body", analyzer);
query = qp.parse(queryString);
}catch (Exception e){
e.printStackTrace();
}
if(searcher != null){
hits = searcher.search(query);
if(hits.length() > 0){
System.out.println("找到:" + hits.length() + " 个结果!");
}
}
}
}
StandardAnalyzerTest:
public class StandardAnalyzerTest {
public StandardAnalyzerTest(){
}
public static void main(String[] args) {
Analyzer aAnalyzer = new StandardAnalyzer();
StringReader sr = new StringReader("lighter javaeye com is the are on");
TokenStream ts = aAnalyzer.tokenStream("name", sr);
try{
int i = 0;
Token t = ts.next();
while(t != null){
i++;
System.out.println("第" + i + "行,结果:" + t.termText());
t = ts.next();
}
}catch (Exception e){
e.printStackTrace();
}
}
}
IndexSort:
public class IndexSort {
public static void main(String[] args) throws Exception{
IndexWriter writer = new IndexWriter("F:\\Analyzer\\indexStroe",
new StandardAnalyzer(), true);
Document doc = new Document();
doc.add(new Field( "sort" , "1",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field( "sort" , "4",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field( "sort" , "3",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field( "sort" , "5",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field( "sort" , "9",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field( "sort" , "6" ,Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field( "sort" , "7",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
writer.close();
}
}
MyScoreDocComparator:
public class MyScoreDocComparator implements ScoreDocComparator{
private Integer[] sort;
public MyScoreDocComparator(IndexReader reader, String fieldName) throws IOException{
sort = new Integer[reader.maxDoc()];
for(int i = 0; i < sort.length; i++){
Document doc = reader.document(i);
sort[i] = new Integer(doc.get(fieldName));
}
}
@Override
public int compare(ScoreDoc i, ScoreDoc j) {
if(sort[i.doc] > sort[j.doc]){
return 1;
}else if(sort[i.doc] < sort[j.doc]){
return -1;
}
return 0;
}
@Override
public Comparable sortValue(ScoreDoc scoreDoc) {
return new Integer(sort[scoreDoc.doc]);
}
@Override
public int sortType() {
return SortField.INT;
}
}
MySortComparatorSource:
public class MySortComparatorSource implements SortComparatorSource {
@Override
public ScoreDocComparator newComparator(IndexReader indexReader, String fieldName)
throws IOException {
if(fieldName.equals("sort")){
return new MyScoreDocComparator(indexReader, fieldName);
}
return null;
}
}
SearchSort:
public class SearchSort {
public static void searchSort1() throws Exception{
IndexSearcher searcher = new IndexSearcher("F:\\Analyzer\\indexStroe");
QueryParser qp = new QueryParser("sort", new StandardAnalyzer());
Query query = qp.parse("4");
Hits hits = searcher.search(query);
System.out.println("有" + hits.length() + "个结果");
for(int i = 0; i < hits.length(); i++){
System.out.println(hits.doc(i).get("sort"));
}
}
public static void searchSort2() throws Exception{
IndexSearcher searcher = new IndexSearcher("F:\\Analyzer\\indexStroe");
// true表示可以有不连续的数值
Query query = new RangeQuery(new Term("sort", "1"), new Term("sort", "9"), true);
// 其他一些SortField的构造
// public SortField (String field, boolean reverse)//根据某个域(field)的名称构造SortField, reverse为false为升序
// public SortField (String field, int type)
// public SortField (String field, int type, boolean reverse)
// public SortField (String field, Locale locale)
// public SortField (String field, Locale locale, boolean reverse)
// public SortField (String field, SortComparatorSource comparator)
// public SortField (String field, SortComparatorSource comparator, booleanreverse)
Hits hits = searcher.search(query, new Sort(new SortField("sort", new MySortComparatorSource())));
System.out.println("有" + hits.length() + "个结果");
for(int i = 0; i < hits.length(); i++){
System.out.println(hits.doc(i).get("sort"));
}
}
public static void main(String[] args) throws Exception{
searchSort1();
searchSort2();
}
}
结果:
有1个结果
4
有7个结果
1
3
4
5
6
7
9