package chapter5;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexCommitPoint;
import org.apache.lucene.store.LockObtainFailedException;
public class LuceneIndexTest {
/**
* @param args
*/
private static String dest_Index_Path = "D:\\workshop\\TextIndex";
static protected String[] keywords = { "001", "002", "003" };
static protected String[] textdetail = { "记录一", "记录二", "记录三" };
public static void main(String[] args) {
Date start = new Date();
Analyzer
textAnalyzer = new SimpleAnalyzer();
try {
IndexWriter
textIndex = new IndexWriter(dest_Index_Path,
textAnalyzer, true);//true or false menus create or update
for (int i = 0; i < 3; i++) {
Document
document = new Document();
Field
field_id = new Field("id", keywords[i], Field.Store.YES,
Field.Index.UN_TOKENIZED
);
document.add(field_id);
Field field_content = new Field("content", textdetail[i],
Field.Store.YES, Field.Index.TOKENIZED
);
document.add(field_content);
}
textIndex.optimize();// 不关闭索引只保存在内存里面。
textIndex.close();
Date end = new Date();
long index_tm = end.getTime() - start.getTime();
System.out.println("Total Time :(ms)");
System.out.println(index_tm);
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (LockObtainFailedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("Index Success!");
}
}
注意,测试代码的版本是lucene2.3.jar
A
IndexWriter
textIndex = new IndexWriter(dest_Index_Path,
textAnalyzer, true);//true or false menus create or update
表示的是索引创建器,3个参数是,路径,分析器,是否重建。第3个参数为true,表示重新建立索引(假若存在则删除原文件),假若为false,那么在原来的基础上更改,这就是创建增量索引。
还有一个构造函数参数是一个目录,可以用以下方法取得。
Directory dir=FSDirectory.getDirectory(dir_name);
Analyzer textAnalyzer=new StandardAnalyzer();
IndexWriter indexWriter=new IndexWriter(dir,textAnalyzer,false);
B Analyzer textAnalyzer = new SimpleAnalyzer();
Analyzer是索引分析器,每个域的数据在添加时都会使用它来进行分析。
C
Document
document = new Document();
Field
field_id = new Field("id", keywords[i], Field.Store.YES,
Field.Index.UN_TOKENIZED
);
这里的Document并不是真正意识的文档,而是一个抽象的概念,可以理解为一个要被索引的内容的一个容器,有不同的Field组成的Document .
D 另外就是索引管理器IndexReader ,用来管理索引的强大工具。可以用它来删除索引
File indexDir=new File("D:\\workshop\\TextIndex");
try {
IndexReader indexReader=IndexReader.open(indexDir);
//准备索引文件的目录,生成对象读取索引内容
Term term=new Term("name","xx.txt");//创建要删除的对象的索引项的表示(Term)
indexReader.deleteDocuments
(term);//删除符合索引项的文档
indexReader.close
();//关闭,实现物力删除
} catch (CorruptIndexException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
下面贴一个给文本文件建立索引的代码,一共参考:
package chapter5;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class LunceneIndexManager {
/**
* @param args
*/
private static String dest_Index_Path = "D:\\workshop\\TextIndex";
private static String text_File_Path = "D:\\largeData\\xx.txt";
public static void main(String[] args) throws IOException {
Date start=new Date();
File file=new File(text_File_Path);
try {
FileReader fileReader=new FileReader(file);
String dir_name=dest_Index_Path;
Directory dir=FSDirectory.getDirectory(dir_name);
Analyzer textAnalyzer=new StandardAnalyzer();
IndexWriter indexWriter=new IndexWriter(dir,textAnalyzer,false);
Document document=new Document();
Field field_name=new Field("name",file.getName(),Field.Store.YES,Field.Index.UN_TOKENIZED);
document.add(field_name);
InputStream inputStream=new FileInputStream(file);
int leng=inputStream.available();
byte[] by=new byte[leng];
inputStream.read(by);
inputStream.close();
String content=new String(by);
Field field_content=new Field("content",content,Field.Store.YES,Field.Index.TOKENIZED);
document.add(field_content);
indexWriter.addDocument(document);
indexWriter.optimize();
indexWriter.close();
Date end=new Date();
long ss=end.getTime()-start.getTime();
System.out.println("Total Time:(ms)");
System.out.println(ss);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("Index Sucess!");
}
}