第二章:lucene构建索引(新增、查询、更新、删除)

package lucene2;


import java.io.IOException;


import junit.framework.TestCase;


import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;


/**
 * 构建索引(基本的新增、查询、更新、删除)
 * @author zhangwx
 *
 */
public class IndexTest extends TestCase{
	protected String[] ids = {"1","2"};
	protected String[] unidexed = {"Netherlands","Italy"};
	protected String[] unstored ={"Amsterdam has lots of bridges","Venice has lots of canals"};
	protected String[] text = {"Amsterdam","Venice"};
	
	private Directory directory;//索引目录
	
	@Override
	protected void setUp() throws Exception {
		directory = new RAMDirectory();//内存索引
		IndexWriter writer = getWriter();
		for(int i=0;i<ids.length;i++){
			Document doc = new Document();
			doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("country",unidexed[i],Field.Store.YES,Field.Index.NO));
			doc.add(new Field("contents",unstored[i],Field.Store.NO,Field.Index.ANALYZED));
			doc.add(new Field("city",text[i],Field.Store.YES,Field.Index.ANALYZED));
			writer.addDocument(doc);
		}
		writer.commit();
		writer.close();//记住:writer用完随时关闭(可能造成writer.lock异常)
	}
	
	/**
	 * 创建IndexWriter
	 * @return IndexWriter
	 * @throws Exception
	 */
	private IndexWriter getWriter() throws Exception{
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35));
		return new IndexWriter(directory,config);
	}
	
	protected int getHitCount(String filedName,String searchString) throws Exception{
		IndexReader reader = IndexReader.open(directory);
		IndexSearcher searcher = new IndexSearcher(reader);
		Term term = new Term(filedName,searchString);
		Query query = new TermQuery(term);
		int hitCount = hitCount(searcher, query);
		searcher.close();
		reader.close();
		return hitCount;
	}
	
	public static int hitCount(IndexSearcher searcher,Query query) throws IOException{
		TopDocs search = searcher.search(query,1);
		return search.totalHits;
	}
	
	
	@Test
	public void testIndexWriter() throws Exception{
		IndexWriter writer = getWriter();
		assertEquals(ids.length, writer.numDocs());//确认写入的文档数
		writer.close();
	}
	
	@Test
	public void testIndexReader() throws Exception{
		IndexReader reader = IndexReader.open(directory);
		assertEquals(ids.length, reader.maxDoc());//确认读取的最大文档数
		assertEquals(ids.length, reader.numDocs());//确认有效的文档数
		reader.close();
	}
	
	@Test
	public void testIndexReader2() throws Exception{
		int hitCount = getHitCount("contents","lots");
		assertEquals(ids.length, hitCount);
		System.out.println(hitCount);
	}
	
	/**
	 * 从索引中删除文档 优化操作前(未执行writer.optimize()之前被删除的文档仍然存在索引中只是状态被标记为已删除)
	 * @throws Exception
	 */
	@Test
	public void testDeleteBeforeOptimize() throws Exception{
		IndexWriter writer = getWriter();
		assertEquals(2, writer.numDocs());//确认索引中的2个文档
		writer.deleteDocuments(new Term("id","1"));//删除id=1的文档
		writer.commit();
		assertTrue(writer.hasDeletions());//确认被标记为删除 (是否存在被标记为删除的文档)
		assertEquals(2, writer.maxDoc());//确认删除后仍然有2个文档(一个文档被标记为删除,一个未删除)
		assertEquals(1, writer.numDocs());//确认剩余一个文档
		writer.close();
	}
	
	/**
	 * 从索引中删除文档 优化操作后 (执行writer.optimize()后被删除的文档将会消失)
	 * @throws Exception
	 */
	@Test
	public void testDeleteAfterOptimize() throws Exception{
		IndexWriter writer = getWriter();
		assertEquals(2, writer.numDocs());//删除前确认索引中两个文档
		writer.deleteDocuments(new Term("id","1"));//删除id=1的文档
		writer.optimize();//执行优化操作
		writer.commit();
		assertFalse(writer.hasDeletions());//确认是否存在被标记为删除的文档
		assertEquals(1, writer.maxDoc());//确认文档总数
		assertEquals(1, writer.numDocs());//确认可使用文档数
		writer.close();
	}
	
	
	/**
	 * 更新索引(原理:先调用deleteDocuments(term) 再调用addDocument() 即先删除原有文档再添加)
	 * 本例中用新文档来替换id为1的旧文档。
	 * @throws Exception
	 */
	@Test
	public void testUpdate() throws Exception{
		assertEquals(1, getHitCount("city", "Amsterdam"));//更新前查询city=Amsterdam,确认存在
		IndexWriter writer = getWriter();
		Document doc = new Document();
		doc.add(new Field("id","1",Field.Store.YES,Field.Index.NOT_ANALYZED));
		doc.add(new Field("country","Netherlands",Field.Store.YES,Field.Index.NO));
		doc.add(new Field("contents","Den Haag has a lot of musenums",Field.Store.NO,Field.Index.ANALYZED));
		doc.add(new Field("city","Haag",Field.Store.YES,Field.Index.ANALYZED));
		writer.updateDocument(new Term("id","1"), doc);//更新文档(先删除id=1的旧文档,再添加新doc)
		writer.close();
		assertEquals(0, getHitCount("city", "Amsterdam"));//确认更新后city=Amsterdam记录不存在
		assertEquals(1, getHitCount("city", "Haag"));//确认city=Den Haag的新闻档已添加
	}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值