Apache Lucene是一个开放源程序的搜寻器引擎,利用它可以轻易地为Java软件加入全文搜寻功能。Lucene的最主要工作是替文件的每一个字作索引,索引让搜寻的效率比传统的逐字比较大大提高,Lucen提供一组解读,过滤,分析文件,编排和使用索引的API,它的强大之处除了高效和简单外,是最重要的是使使用者可以随时应自己需要自订其功能。
首先在自己的电脑上建三个文本供建立索引使用,我一共建了三个文本
然后编写一个索引类:
package com.lucene.test;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestFileIndex {
public static void main(String[] args) throws Exception {
String dataDir="d:/lucene/data";
String indexDir="d:/lucene/index";
File[] files=new File(dataDir).listFiles();
Analyzer analyzer=new SmartChineseAnalyzer(Version.LUCENE_36, true);
Directory dir=FSDirectory.open(new File(indexDir));
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = new IndexWriter(dir, iwc);
for(int i=0;i<files.length;i++){
System.out.println("文件: "+files[i].getName()+" 索引建立中....");
Long startTime = System.currentTimeMillis();
StringBuffer strBuffer=new StringBuffer();
String line="";
FileInputStream is=new FileInputStream(files[i].getCanonicalPath());
BufferedReader reader=new BufferedReader(new InputStreamReader(is,"gb2312"));
line=reader.readLine();
while(line != null){
strBuffer.append(line);
strBuffer.append("\n");
line=reader.readLine();
}
Document doc=new Document();
doc.add(new Field("fileName", files[i].getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("contents", strBuffer.toString(), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
reader.close();
is.close();
Long endTime = System.currentTimeMillis();
System.out.println("文件: "+files[i].getName()+"索引建立结束。 所用时间为:"+(endTime - startTime)+"毫秒" );
}
writer.commit();
writer.close();
dir.close();
System.out.println("索引结束");
}
}
运行完,在index文件夹下回产生索引文件:
再编写一个searcher类,检索index文件夹,作为测试:
package com.lucene.test;
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestFileSearcher {
public static void main(String[] args) throws Exception {
String indexDir = "d:/lucene/index";
Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_36, true);
Directory dir = FSDirectory.open(new File(indexDir));
IndexReader reader;
reader = IndexReader.open(FSDirectory.open(new File(indexDir)));
IndexSearcher searcher = new IndexSearcher(reader);
QueryParser parser = new QueryParser(Version.LUCENE_36, "contents",analyzer);
Query query = parser.parse("青元");
// Term term=new Term("fileName", "test");
// TermQuery query=new TermQuery(term);
TopDocs docs=searcher.search(query, 1000);
ScoreDoc[] hits=docs.scoreDocs;
System.out.println(hits.length);
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
System.out.print(doc.get("fileName")+"\n");
// System.out.println(doc.get("contents")+"\n");
}
searcher.close();
dir.close();
}
}