package lucene2;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
/**
* 构建索引(基本的新增、查询、更新、删除)
* @author zhangwx
*
*/
public class IndexTest extends TestCase{
protected String[] ids = {"1","2"};
protected String[] unidexed = {"Netherlands","Italy"};
protected String[] unstored ={"Amsterdam has lots of bridges","Venice has lots of canals"};
protected String[] text = {"Amsterdam","Venice"};
private Directory directory;//索引目录
@Override
protected void setUp() throws Exception {
directory = new RAMDirectory();//内存索引
IndexWriter writer = getWriter();
for(int i=0;i<ids.length;i++){
Document doc = new Document();
doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("country",unidexed[i],Field.Store.YES,Field.Index.NO));
doc.add(new Field("contents",unstored[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("city",text[i],Field.Store.YES,Field.Index.ANALYZED));
writer.addDocument(doc);
}
writer.commit();
writer.close();//记住:writer用完随时关闭(可能造成writer.lock异常)
}
/**
* 创建IndexWriter
* @return IndexWriter
* @throws Exception
*/
private IndexWriter getWriter() throws Exception{
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35));
return new IndexWriter(directory,config);
}
protected int getHitCount(String filedName,String searchString) throws Exception{
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
Term term = new Term(filedName,searchString);
Query query = new TermQuery(term);
int hitCount = hitCount(searcher, query);
searcher.close();
reader.close();
return hitCount;
}
public static int hitCount(IndexSearcher searcher,Query query) throws IOException{
TopDocs search = searcher.search(query,1);
return search.totalHits;
}
@Test
public void testIndexWriter() throws Exception{
IndexWriter writer = getWriter();
assertEquals(ids.length, writer.numDocs());//确认写入的文档数
writer.close();
}
@Test
public void testIndexReader() throws Exception{
IndexReader reader = IndexReader.open(directory);
assertEquals(ids.length, reader.maxDoc());//确认读取的最大文档数
assertEquals(ids.length, reader.numDocs());//确认有效的文档数
reader.close();
}
@Test
public void testIndexReader2() throws Exception{
int hitCount = getHitCount("contents","lots");
assertEquals(ids.length, hitCount);
System.out.println(hitCount);
}
/**
* 从索引中删除文档 优化操作前(未执行writer.optimize()之前被删除的文档仍然存在索引中只是状态被标记为已删除)
* @throws Exception
*/
@Test
public void testDeleteBeforeOptimize() throws Exception{
IndexWriter writer = getWriter();
assertEquals(2, writer.numDocs());//确认索引中的2个文档
writer.deleteDocuments(new Term("id","1"));//删除id=1的文档
writer.commit();
assertTrue(writer.hasDeletions());//确认被标记为删除 (是否存在被标记为删除的文档)
assertEquals(2, writer.maxDoc());//确认删除后仍然有2个文档(一个文档被标记为删除,一个未删除)
assertEquals(1, writer.numDocs());//确认剩余一个文档
writer.close();
}
/**
* 从索引中删除文档 优化操作后 (执行writer.optimize()后被删除的文档将会消失)
* @throws Exception
*/
@Test
public void testDeleteAfterOptimize() throws Exception{
IndexWriter writer = getWriter();
assertEquals(2, writer.numDocs());//删除前确认索引中两个文档
writer.deleteDocuments(new Term("id","1"));//删除id=1的文档
writer.optimize();//执行优化操作
writer.commit();
assertFalse(writer.hasDeletions());//确认是否存在被标记为删除的文档
assertEquals(1, writer.maxDoc());//确认文档总数
assertEquals(1, writer.numDocs());//确认可使用文档数
writer.close();
}
/**
* 更新索引(原理:先调用deleteDocuments(term) 再调用addDocument() 即先删除原有文档再添加)
* 本例中用新文档来替换id为1的旧文档。
* @throws Exception
*/
@Test
public void testUpdate() throws Exception{
assertEquals(1, getHitCount("city", "Amsterdam"));//更新前查询city=Amsterdam,确认存在
IndexWriter writer = getWriter();
Document doc = new Document();
doc.add(new Field("id","1",Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("country","Netherlands",Field.Store.YES,Field.Index.NO));
doc.add(new Field("contents","Den Haag has a lot of musenums",Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("city","Haag",Field.Store.YES,Field.Index.ANALYZED));
writer.updateDocument(new Term("id","1"), doc);//更新文档(先删除id=1的旧文档,再添加新doc)
writer.close();
assertEquals(0, getHitCount("city", "Amsterdam"));//确认更新后city=Amsterdam记录不存在
assertEquals(1, getHitCount("city", "Haag"));//确认city=Den Haag的新闻档已添加
}
}
第二章:lucene构建索引(新增、查询、更新、删除)
最新推荐文章于 2023-05-29 02:03:01 发布