package cn.com.lucene;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class IndexUtil {
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@163.com","bb@163.com","cc@163.com","dd@163.com","ee@163.com","ff@163.com"};
private String[] content = {
"Hello1",
"Hello2",
"Hello3",
"Hello4",
"Hello5",
"Hello6"
};
private int[] attachments = {3,2,1,4,5,2};
private String[] names = {"zhangsan","lisi","wangwu","John","Mack","Tom"};
private Directory directory = null;
public IndexUtil(){
try {
directory = FSDirectory.open(new File("d:/lucene/index02"));
} catch (IOException e) {
e.printStackTrace();
}
}
//建立索引
public void index(){
IndexWriter writer = null;
try {
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35));
writer = new IndexWriter(directory,iwc);
Document doc = null;
/**
* Field.Store.YES或者NO(存储域选项)
* 设置为YES表示把这个域中的内容完全存储在文件中,方便进行文本的还原
* 设置为NO表示把这个域中的内容不存储到文件中,但是可以被索引,此时内容无法还原(无法使用doc.get(XXX))
*/
/**
* Field.Index.*(索引域选项)
* Index.ANALYZED:进行分词和索引,适用于标题、内容等
* Index.NOT_ANALYZED:进行索引,但是不进行分词,如身份证号,姓名,id等,适用于精确搜索
* Index.ANALYZED_NOT_NORMS:进行分词但是不存储norms信息,这个norms中包括了创建索引的时间和权值等信息。
* Index.NOT_ANALYZED_NOT_NORMS:即不进行分词也不存储norms信息
* Index.NO:不进行索引
*/
for(int i=0;i<ids.length;i++){
doc = new Document();
doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",content[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 查询
*/
public void query(){
try {
IndexReader reader = IndexReader.open(directory);
System.out.println("numDocs:" + reader.numDocs());
System.out.println("maxDocs:" + reader.maxDoc());
System.out.println("deleteDocs:" + reader.numDeletedDocs());
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 删除索引
* @throws Exception
* @throws LockObtainFailedException
* @throws CorruptIndexException
*/
public void delete() throws CorruptIndexException, LockObtainFailedException, Exception{
IndexWriter writer = null;
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//参数是一个选项,可以是一个Query,也可以是一个Term(一个精确查找的值)
//此时删除的文档并不会被完全删除,而是存储在一个回收站中的,可以恢复
writer.deleteDocuments(new Term("id","1"));
writer.close();
}
/**
* 恢复
*/
public void recover(){
IndexReader reader = null;
try {
//恢复时必须把IndexReader的readOnly设置为false
reader = IndexReader.open(directory,false);
reader.undeleteAll();
reader.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* forceDelete
*/
public void forceDelete(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
writer.forceMergeDeletes();
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void update(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//没有提供更新方法,是两个操作的合集:先删除,再添加
Document doc = new Document();
doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",content[0],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
writer.updateDocument(new Term("id","1"), doc);
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
测试代码
package cn.com.lucene;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.LockObtainFailedException;
import org.junit.Test;
public class TestIndex {
@Test
public void testIndex(){
IndexUtil iu = new IndexUtil();
iu.index();
}
@Test
public void testQuery(){
IndexUtil iu = new IndexUtil();
iu.query();
}
@Test
public void testDelete() throws Exception{
IndexUtil iu = new IndexUtil();
iu.delete();
}
@Test
public void testRecover(){
IndexUtil iu = new IndexUtil();
iu.recover();
}
@Test
public void testForceDelete(){
IndexUtil iu = new IndexUtil();
iu.forceDelete();
}
@Test
public void testUpdate(){
IndexUtil iu = new IndexUtil();
iu.update();
}
}