Lucene3.5 之索引删除和更新

最新推荐文章于 2023-05-22 21:35:28 发布

doymm2008

最新推荐文章于 2023-05-22 21:35:28 发布

阅读量3.4k

点赞数 1

分类专栏： Java技术文章标签： lucene null 文档 merge 存储 email

Java技术专栏收录该内容

68 篇文章 5 订阅

订阅专栏

package com.ethan.index;
import java.io.File;
import java.io.IOException;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
public class IndexUtil {
private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"11@qq.com","22@qq.com","33@126.com","43@yahoo.cn","54@gmail.com","65@qq.com"};
private String[] contents = {
"welcome to nba hot",
"my name is ethan",
"someone like you ",
"rolling in the deep, you like",
"i like fast........",
"l like sports"
};
private int[] attachs = {2,3,1,5,4,6};
private String[] names = {"ethan","sara","michael","wade","lin","paul"};
private Directory directory = null;
public IndexUtil() {
try {
directory = FSDirectory.open(new File("C:\\Users\\ETHAN\\workspace\\hellolucene\\index02"));
} catch (IOException e) {
e.printStackTrace();
}
}
public void index() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)) );
Document doc = null;
for(int i=0;i<ids.length;i++) {
doc = new Document();
doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.ANALYZED_NO_NORMS));
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void query() {
try {
IndexReader reader = IndexReader.open(directory);
//被存储的
System.out.println("numDocs: "+reader.numDocs());
//文档总量
System.out.println("maxDocs: "+reader.maxDoc());
//删除的文档
System.out.println("deleteDocs: "+reader.numDeletedDocs());;
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public void delete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//参数是一个选项，可以是一个Query,也可以是一个term,term是一个精确查找的值
//这里删除id=1的文档，还会留在”回收站“。xxx.del
writer.deleteDocuments(new Term("id","1"));
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void undelete() {
//使用IndexReader进行恢复
IndexReader reader = null;
try {
//set readOnly=false
reader = IndexReader.open(directory,false);
reader.undeleteAll();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(reader!=null) {
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void forceDelete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//强制优化，del文件就没了，回收站清空
writer.forceMergeDeletes();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/*
* 自己手动merge
* 多次创建索引，文件会增多，
* 比如 5次的话，5个id=1的
*
* merge后合并为n段
*/
public void merge() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//将索引合并为2段，这两段中的del文件会被清空
//3.5后不建议使用，开销大，lucene会根据情况自动处理
writer.forceMerge(2);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/*
* 更新操作
*/
public void update() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
//lucene没有提供更新方法，这里操作分为两步
//匹配后删除和添加新的
Document doc = new Document();
doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.ANALYZED_NO_NORMS));
writer.updateDocument(new Term("id","1"),doc);
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(writer!=null) {
try {
writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}