Lucene3.5 之 索引删除和更新

package com.ethan.index;

import java.io.File;
import java.io.IOException;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.FieldOption;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;

public class IndexUtil {
	private String[] ids = {"1","2","3","4","5","6"};
	private String[] emails = {"11@qq.com","22@qq.com","33@126.com","43@yahoo.cn","54@gmail.com","65@qq.com"};
	private String[] contents = {
			"welcome to nba hot",
			"my name is ethan",
			"someone like you ",
			"rolling in the deep, you like",
			"i like fast........",
			"l like sports"
	};
	
	private int[] attachs = {2,3,1,5,4,6};
	private String[] names = {"ethan","sara","michael","wade","lin","paul"};
	
	private Directory directory = null;
	
	public IndexUtil() {
	     try {
			directory = FSDirectory.open(new File("C:\\Users\\ETHAN\\workspace\\hellolucene\\index02"));
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public void index() {
		IndexWriter writer = null;
		
		try {
			writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)) );
			Document doc = null;
			for(int i=0;i<ids.length;i++) {
				doc = new Document();
				doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
				doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
			
				doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
				doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.ANALYZED_NO_NORMS));
				
				writer.addDocument(doc);
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(writer!=null) {
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		
	}

	public void query() {
		
		try {
			IndexReader reader = IndexReader.open(directory);
			//被存储的
			System.out.println("numDocs: "+reader.numDocs());
			
			//文档总量
			System.out.println("maxDocs: "+reader.maxDoc());
			//删除的文档
			System.out.println("deleteDocs: "+reader.numDeletedDocs());;
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public void delete() {
		IndexWriter writer = null;
		
		try {
			writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			
			//参数是一个选项,可以是一个Query,也可以是一个term,term是一个精确查找的值
			//这里删除id=1的文档,还会留在”回收站“。xxx.del
			writer.deleteDocuments(new Term("id","1"));
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(writer!=null) {
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		
	}

	public void undelete() {
		//使用IndexReader进行恢复
		IndexReader reader = null;
		try {
			//set readOnly=false
		    reader = IndexReader.open(directory,false);
			reader.undeleteAll();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			 if(reader!=null) {
				 try {
					reader.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			 }
		}
		
	}

	public void forceDelete() {
		IndexWriter writer = null;
		
		try {
			writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			
			//强制优化,del文件就没了,回收站清空
			writer.forceMergeDeletes();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(writer!=null) {
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

	/*
	 * 自己手动merge
	 * 多次创建索引,文件会增多,
	 * 比如 5次的话,5个id=1的
	 * 
	 * merge后合并为n段
	 */
	
	public void merge() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			
			//将索引合并为2段,这两段中的del文件会被清空
			//3.5后不建议使用,开销大,lucene会根据情况自动处理
			writer.forceMerge(2);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(writer!=null) {
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

	/*
	 * 更新操作
	 */
	public void update() {
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new StandardAnalyzer(Version.LUCENE_35)));
			
			//lucene没有提供更新方法,这里操作分为两步
			//匹配后删除 和 添加新的
			
			Document doc = new Document();
			doc.add(new Field("id","11",Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
			doc.add(new Field("email",emails[0],Field.Store.YES,Field.Index.NOT_ANALYZED));
		
			doc.add(new Field("content",contents[0],Field.Store.NO,Field.Index.ANALYZED));
			doc.add(new Field("name",names[0],Field.Store.YES,Field.Index.ANALYZED_NO_NORMS));
			
			writer.updateDocument(new Term("id","1"),doc);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if(writer!=null) {
				try {
					writer.close();
				} catch (CorruptIndexException e) {
					e.printStackTrace();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}
}
package com.ethan.test;

import org.junit.Test;

import com.ethan.index.IndexUtil;

public class IndexTest {
	
	@Test
	public void testIndex() {
		IndexUtil iu = new IndexUtil();
		iu.index();
	}
	/*
	 * numDocs: 24
		maxDocs: 24
		deleteDocs: 0
	 */
	@Test
	public void testQuery() {
		IndexUtil iu = new IndexUtil();
		iu.query();
	}
	/*
	 * numDocs: 20
		maxDocs: 24
		deleteDocs: 4 (id=1 4条)
	 */
	@Test
	public void testDelete() {
		IndexUtil iu = new IndexUtil();
		iu.delete();
	}
	
	/*
	 * numDocs: 7
		maxDocs: 7
		deleteDocs: 0
	 */
	@Test
	public void testUnDelete() {
		IndexUtil iu = new IndexUtil();
		iu.undelete();
	}
	
	/*
	 * numDocs: 6
		maxDocs: 6(7)
		deleteDocs: 0(1)
	 */
	@Test
	public void testForceDelete() {
		IndexUtil iu = new IndexUtil();
		iu.forceDelete();
	}
	
	/*
	 * merge后:
	 * numDocs: 20
		maxDocs: 21
		deleteDocs: 1(因为强制合并为2段,所以_0_1.del没删)
		_0为第一段,不动,把后边的合并为一段
	 */
	@Test
	public void testMerge() {
		IndexUtil iu = new IndexUtil();
		iu.merge();
	}
	
	/*
	 * numDocs: 6
		maxDocs: 7
		deleteDocs: 1
	
	 	删除后 add
	 */
	@Test
	public void testUpdate() {
		IndexUtil iu = new IndexUtil();
		iu.update();
	}
}

索引文件中文件 表示含义:


0.fnm: 保存的field的信息,有哪几个字段


0.fdt,0.fdx:  Store.YES的对应字段的值


0.frq:单词出现的频率


0.nrm: 存储评分信息,权重


0.prx: 偏移量


0.tii,0.tis: 存储索引信息


文档和域的概念:


文档相当于表中的一条记录,域相当于表中每一个字段

optimize() 已被启用,开销比较大
forceMergeDeletes() 强制把回收站的内容给删掉


当segment比较多时,lucene会自动优化处理


  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值