导读:
Lucene之开山篇
功能: 实现为本地磁盘文件(文本类型文件)建索引,并提供查询功能!
说明: Lucene 2.0.0http://lucene.apache.org/
作者: 寒江风
日期: 2006-08-08
㊣ Constants
packageedu.whu.mylucene;
/**
*常量定义
* @authorbbflyerwww
* @date2006-08-08
*/
public classConstants {
// 要索引文件的存放路径
public final staticString INDEX_FILE_PATH= "E://FILES"
// 索引存放的位置
public final staticString INDEX_STORE_PATH= "E://INDEX"
}
㊣ ExIndexer
packageedu.whu.mylucene;
importjava.io.BufferedReader;
importjava.io.File;
importjava.io.FileInputStream;
importjava.io.IOException;
importjava.io.InputStreamReader;
importjava.io.Reader;
importjava.util.Date;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;
/**
*为本地磁盘目录的文件建索引
* @authorbbflyerwww
* @date2006-08-08
*/
public classExIndexer {
// 索引器
privateIndexWriter writer= null
publicExIndexer() {
}
public voidinit() {
// 初始化索引器
try{
Analyzer analyzer = newStandardAnalyzer();
writer= newIndexWriter(Constants.INDEX_STORE_PATH, analyzer, true);
writer.setMergeFactor(50);
writer.setMaxMergeDocs(5000);
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public voidindex(File file) {
// 为File建索引(文件夹文件)
if(!file.exists()) {
return
}
try{
if(file.isDirectory()) {
if(!file.canRead()) {
return
}
File[] files = file.listFiles();
for( inti = 0; i index(files[i]);
}
} else{
if(file.isHidden() || !file.canRead()) {
return
}
System.out.println("Indexing "+ file.getPath());
String path = file.getCanonicalPath();
String date = newDate(file.lastModified()).toString();
Reader reader = newBufferedReader( newInputStreamReader( newFileInputStream(file)));
Document doc = newDocument();
doc.add( newField("path", path, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add( newField("modified", date, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add( newField("contents", reader));
writer.addDocument(doc);
}
} catch(IOException ioe) {
ioe.printStackTrace();
}
}
public voidclose() {
// 优化索引关闭索引器
try{
writer.optimize();
writer.close();
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public static voidmain(String[] args) {
longstart = System.currentTimeMillis();
ExIndexer ex = newExIndexer();
ex.init();
ex.index( newFile(Constants.INDEX_FILE_PATH));
ex.close();
longend = System.currentTimeMillis();
System.out.println("Cost : "+ (end - start) + "ms");
}
}
㊣ ExSearcher
packageedu.whu.mylucene;
importjava.io.IOException;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.queryParser.ParseException;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
/**
*检索工具
* @authorbbflyerwww
* @date2006-08-08
*/
public classExSearcher {
// 索引查询器
privateIndexSearcher searcher= null
// 索引查询解析器
privateQueryParser parser= null
publicExSearcher() {
}
public voidinit() {
// 初始化查询器和查询解析器
try{
IndexReader reader = IndexReader.open(Constants.INDEX_STORE_PATH);
searcher= newIndexSearcher(reader);
Analyzer analyzer = newStandardAnalyzer();
parser= newQueryParser("contents", analyzer);
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
publicHits search(String keyword) {
// 查询关键字
System.out.println("正在检索关键字: "+ keyword);
Hits hits = null
try{
Query query = parser.parse(keyword);
hits = searcher.search(query);
} catch(IOException ioe) {
ioe.printStackTrace();
} catch(ParseException pe) {
pe.printStackTrace();
}
returnhits;
}
public voidclose() {
// 关闭索引查询器
try{
searcher.close();
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public static voidmain(String[] args) {
ExSearcher ex = newExSearcher();
ex.init();
try{
Hits hits = ex.search("table");
intk = hits.length();
for( inti = 0; i Document doc = hits.doc(i);
System.out.println(i + "/t"+ hits.score(i) + "/t"+ doc.get("path"));
}
} catch(IOException ioe) {
ioe.printStackTrace();
}
ex.close();
}
}
Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=1038783
本文转自
http://blog.csdn.net/bbflyerwww/archive/2006/08/08/1038783.aspx
Lucene之开山篇
功能: 实现为本地磁盘文件(文本类型文件)建索引,并提供查询功能!
说明: Lucene 2.0.0http://lucene.apache.org/
作者: 寒江风
日期: 2006-08-08
㊣ Constants
packageedu.whu.mylucene;
/**
*常量定义
* @authorbbflyerwww
* @date2006-08-08
*/
public classConstants {
// 要索引文件的存放路径
public final staticString INDEX_FILE_PATH= "E://FILES"
// 索引存放的位置
public final staticString INDEX_STORE_PATH= "E://INDEX"
}
㊣ ExIndexer
packageedu.whu.mylucene;
importjava.io.BufferedReader;
importjava.io.File;
importjava.io.FileInputStream;
importjava.io.IOException;
importjava.io.InputStreamReader;
importjava.io.Reader;
importjava.util.Date;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;
/**
*为本地磁盘目录的文件建索引
* @authorbbflyerwww
* @date2006-08-08
*/
public classExIndexer {
// 索引器
privateIndexWriter writer= null
publicExIndexer() {
}
public voidinit() {
// 初始化索引器
try{
Analyzer analyzer = newStandardAnalyzer();
writer= newIndexWriter(Constants.INDEX_STORE_PATH, analyzer, true);
writer.setMergeFactor(50);
writer.setMaxMergeDocs(5000);
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public voidindex(File file) {
// 为File建索引(文件夹文件)
if(!file.exists()) {
return
}
try{
if(file.isDirectory()) {
if(!file.canRead()) {
return
}
File[] files = file.listFiles();
for( inti = 0; i index(files[i]);
}
} else{
if(file.isHidden() || !file.canRead()) {
return
}
System.out.println("Indexing "+ file.getPath());
String path = file.getCanonicalPath();
String date = newDate(file.lastModified()).toString();
Reader reader = newBufferedReader( newInputStreamReader( newFileInputStream(file)));
Document doc = newDocument();
doc.add( newField("path", path, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add( newField("modified", date, Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add( newField("contents", reader));
writer.addDocument(doc);
}
} catch(IOException ioe) {
ioe.printStackTrace();
}
}
public voidclose() {
// 优化索引关闭索引器
try{
writer.optimize();
writer.close();
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public static voidmain(String[] args) {
longstart = System.currentTimeMillis();
ExIndexer ex = newExIndexer();
ex.init();
ex.index( newFile(Constants.INDEX_FILE_PATH));
ex.close();
longend = System.currentTimeMillis();
System.out.println("Cost : "+ (end - start) + "ms");
}
}
㊣ ExSearcher
packageedu.whu.mylucene;
importjava.io.IOException;
importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.standard.StandardAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.queryParser.ParseException;
importorg.apache.lucene.queryParser.QueryParser;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
/**
*检索工具
* @authorbbflyerwww
* @date2006-08-08
*/
public classExSearcher {
// 索引查询器
privateIndexSearcher searcher= null
// 索引查询解析器
privateQueryParser parser= null
publicExSearcher() {
}
public voidinit() {
// 初始化查询器和查询解析器
try{
IndexReader reader = IndexReader.open(Constants.INDEX_STORE_PATH);
searcher= newIndexSearcher(reader);
Analyzer analyzer = newStandardAnalyzer();
parser= newQueryParser("contents", analyzer);
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
publicHits search(String keyword) {
// 查询关键字
System.out.println("正在检索关键字: "+ keyword);
Hits hits = null
try{
Query query = parser.parse(keyword);
hits = searcher.search(query);
} catch(IOException ioe) {
ioe.printStackTrace();
} catch(ParseException pe) {
pe.printStackTrace();
}
returnhits;
}
public voidclose() {
// 关闭索引查询器
try{
searcher.close();
} catch(IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
}
public static voidmain(String[] args) {
ExSearcher ex = newExSearcher();
ex.init();
try{
Hits hits = ex.search("table");
intk = hits.length();
for( inti = 0; i Document doc = hits.doc(i);
System.out.println(i + "/t"+ hits.score(i) + "/t"+ doc.get("path"));
}
} catch(IOException ioe) {
ioe.printStackTrace();
}
ex.close();
}
}
Trackback: http://tb.blog.csdn.net/TrackBack.aspx?PostId=1038783
本文转自
http://blog.csdn.net/bbflyerwww/archive/2006/08/08/1038783.aspx