初次接触Lucene,一步一步来,要实现全文检索,第一步就得先创建索引,请直接看代码:
package cn.com.alei.lucene;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
/**
* @author alei
* @version 创建时间:2012-4-14 下午03:19:09
* 类说明
*/
public class Indexer {
public static void main(String[] args) throws CorruptIndexException, LockObtainFailedException, IOException {
//被索引文件夹的位置
File path = new File("D:\\framework\\luceneDir");
/*存放索引文件的位置*/
File fileindex = new File("D:\\framework\\luceneIndex");
//创建Directory对象
Directory directory = new SimpleFSDirectory(fileindex);
//Analyzer主要负责对各种输入的数据源数据进行分析,包括过滤,分词等各种功能,StandardAnalyzer是Lucene里自带的Analyzer子类实现
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
/*/创建IndexWriter对象,第一个参数是Directory,第二个参数为分词器,第三个参数表示是否创建,
false表示修改,第四个表示分词的最大值,比如new MaxFieldLength(2)
表示两个字一分,一般用IndexWriter.MaxFieldLength.UNLIMITED
*/
IndexWriter indexwriter = new IndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.UNLIMITED);
File[] files = path.listFiles();
long startTime = new Date().getTime();
//增加Document到索引中去
for(int i = 0 ; i<files.length ; i++){
if(files[i].isFile()&&files[i].getName().endsWith(".txt")){
System.out.println("File: " + files[i].getCanonicalPath() + " 正在被索引...");
String fileContent = readFile(files[i],"GB2312");
System.out.println(fileContent);
Document document = new Document();
Field fieldcontent = new Field("content" , fileContent , Store.NO , Index.ANALYZED);
Field filedpath = new Field("path" , files[i].getCanonicalPath() , Field.Store.NO , Field.Index.NOT_ANALYZED);
document.add(fieldcontent);
document.add(filedpath);
indexwriter.addDocument(document);
}
}
//对索引进行优化
indexwriter.optimize();
indexwriter.close();
long endTime = new Date().getTime();
System.out.println("花费了 :" + (endTime - startTime) + "毫秒把文档加到索引中去!!!");
}
public static String readFile(File file , String charset) throws IOException{
BufferedReader bufferedReader = new BufferedReader(
new InputStreamReader(
new FileInputStream(file),charset));
StringBuilder sb = new StringBuilder();
String str ;
while((str=bufferedReader.readLine())!= null){
sb.append(str);
}
bufferedReader.close();
return sb.toString();
}
}