今天不练习lucene2.0.1了练习一下最新版本的lucene3.5
package jim.Lucene35;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import tool.FileList;
import tool.FileText;
public class Lucene35 {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
new creatIndex();
}
}
class creatIndex{
Directory directory = null;//用来决定引索目录的储存方式
IndexWriter writer = null;//引索器
Document document = null;//引索文件
Field field = null;//字段信息
IndexWriterConfig iwc = null;//用来选择lucene的版本以及分词器的版本
String indexPath = "index";//引索目录的储存地址
String title = "title";//文件的标题
String content = "content";//文件的内容
String [] files = null;//用来记录文件夹里所有文件的地址
public creatIndex(){//构造器
try {
directory = FSDirectory.open(new File(indexPath));//创建directory,其储存方式为在硬盘上储存
} catch (IOException e) {
System.out.println("创建Directory时发生错误!");
// TODO Auto-generated catch block
e.printStackTrace();
}
iwc = new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35));//选择lucene的版本以及分词器的版本
try {
writer = new IndexWriter(directory,iwc);//创建引索器
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
System.out.println("创建IndexWriter时发生错误!");
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
try {
files = FileList.getFiles("testFiles");//记录文件夹里所有文件的地址
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("无法打开存放要搜索的文件的文件夹");
e.printStackTrace();
}//存放要搜索的文件的文件夹
int num = files.length;//记录文件夹里所有文件的总数
for( int i = 0; i < num ; i++ ){
document = new Document();//创建索引文件
File file = new File(files[i]);
title = file.getName();//取得文件的名字
field = new Field("title",title,Field.Store.YES,Index.NOT_ANALYZED);//创建索引字段
document.add(field);
content = FileText.getText(file);
field = new Field("content",content,Field.Store.YES,Index.ANALYZED);
document.add(field);
String Path = file.getPath();//获取文件的路径
field = new Field("path",Path,Field.Store.YES,Index.NOT_ANALYZED);
document.add(field);
System.out.println("File: "+title+" Indexed");
try {
writer.addDocument(document);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
System.out.println("将Document写入IndexWriter时错误!");
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
try {
writer.close();//关闭索引器
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("IndexWriter关闭时错误!");
e.printStackTrace();
}
System.out.println("Index is Created!");
}
}
新知识:
创建引索的步骤:
1.创建Directory
2.创建IndexWriter
3.创建Document
4.为Document添加Field对象
5.将Document添加到IndexWriter中
今天的收获:
能用lucene3.5写一个引索程序.
今天的不足:
对IK分词器摸不着头绪.