Lucene之开山篇
功能: 实现为本地磁盘文件(文本类型文件)建索引,并提供查询功能!
说明: Lucene 2.0.0 http://lucene.apache.org/
作者: 寒江风
日期: 2006-08-08
㊣ Constants
package edu.whu.mylucene;
/**
* 常量定义
* @author bbflyerwww
* @date 2006 - 08 - 08
*/
public class Constants {
// 要索引文件的存放路径
public final static String INDEX_FILE_PATH = "E://FILES";
// 索引存放的位置
public final static String INDEX_STORE_PATH = "E://INDEX";
}
㊣ ExIndexer
package edu.whu.mylucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
/**
* 为本地磁盘目录的文件建索引
* @author bbflyerwww
* @date 2006 - 08 - 08
*/
public class ExIndexer {
// 索引器
private IndexWriter writer = null;
public ExIndexer() {
}
public void init() {
// 初始化索引器
try {
Analyzer analyzer = new StandardAnalyzer();
writer = new IndexWriter(Constants.INDEX_STORE_PATH, analyzer, true);
writer.setMergeFactor(50);
writer.setMaxMergeDocs(5000);
} catch (IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public void index(File file) {
// 为File建索引 (文件夹 文件)
if (!file.exists()) {
return;
}
try {
if (file.isDirectory()) {
if (!file.canRead()) {
return;
}
File[] files = file.listFiles();
for (int i = 0; i < files.length; i++) {
index(files[i]);
}
} else {
if (file.isHidden() || !file.canRead()) {
return;
}
System.out.println("Indexing " + file.getPath());
String path = file.getCanonicalPath();
String date = new Date(file.lastModified()).toString();
Reader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
Document doc = new Document();
doc.add(new Field("path", path, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("modified", date, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("contents", reader));
writer.addDocument(doc);
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
public void close() {
// 优化索引 关闭索引器
try {
writer.optimize();
writer.close();
} catch (IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public static void main(String[] args) {
long start = System.currentTimeMillis();
ExIndexer ex = new ExIndexer();
ex.init();
ex.index(new File(Constants.INDEX_FILE_PATH));
ex.close();
long end = System.currentTimeMillis();
System.out.println("Cost : " + (end - start) + " ms");
}
}
㊣ ExSearcher
package edu.whu.mylucene;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
/**
* 检索工具
* @author bbflyerwww
* @date 2006 - 08 - 08
*/
public class ExSearcher {
// 索引查询器
private IndexSearcher searcher = null;
// 索引查询解析器
private QueryParser parser = null;
public ExSearcher() {
}
public void init() {
// 初始化查询器和查询解析器
try {
IndexReader reader = IndexReader.open(Constants.INDEX_STORE_PATH);
searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer();
parser = new QueryParser("contents", analyzer);
} catch (IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public Hits search(String keyword) {
// 查询关键字
System.out.println("正在检索关键字 : " + keyword);
Hits hits = null;
try {
Query query = parser.parse(keyword);
hits = searcher.search(query);
} catch (IOException ioe) {
ioe.printStackTrace();
} catch (ParseException pe) {
pe.printStackTrace();
}
return hits;
}
public void close() {
// 关闭索引查询器
try {
searcher.close();
} catch (IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public static void main(String[] args) {
ExSearcher ex = new ExSearcher();
ex.init();
try {
Hits hits = ex.search("table");
int k = hits.length();
for (int i = 0; i < k; i++) {
Document doc = hits.doc(i);
System.out.println(i + "/t" + hits.score(i) + "/t" + doc.get("path"));
}
} catch (IOException ioe) {
ioe.printStackTrace();
}
ex.close();
}
}