TestIndex.java
package org.itat.lucene.test;
import org.itat.lucene.util.IndexUtil;
import org.junit.Test;
/**
* @ProjectName:lucene_index
* @ClassName:TestIndex
* @Description:索引
* @date: 2015-4-20下午06:07:37
* @author: 半仙儿
* @version: V1.0
* @date:2015-4-20下午06:07:37
*/
public class TestIndex {
/**
*@MethodName:testIndex
*@Description:创建索引
*@author:半仙儿
*@return void
*@date:2015-4-27下午01:56:51
*/
@Test
public void testIndex() {
IndexUtil iu = new IndexUtil();
iu.index();
}
/**
*@MethodName:testQuery
*@Description:查询索引有多少条 numDocs\maxDocs\deleteDocs
*@author:半仙儿
*@return void
*@date:2015-4-27下午01:56:44
*/
@Test
public void testQuery() {
IndexUtil iu = new IndexUtil();
iu.query();
}
/**
*@MethodName:testSearch02
*@Description:NRT搜索
*@author:半仙儿
*@return void
*@date:2015-4-27下午02:13:11
*/
@Test
public void testSearch02() {
IndexUtil iu = new IndexUtil();
for (int i = 0; i < 5; i++) {
// 进行查询
iu.searcher02();
System.out.println("------------------------------------");
// 删除id为11的
iu.delete();
if (i == 2) {
// 更新
iu.update();
}
try {
Thread.sleep(2000);
} catch (Exception e) {
e.printStackTrace();
}
}
// 提交
iu.commit();
}
}
IndexUtil.java
package org.itat.lucene.util;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NRTManager;
import org.apache.lucene.search.NRTManagerReopenThread;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.SearcherWarmer;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class IndexUtil {
private String[] ids = { "1", "2", "3", "4", "5", "6" };
private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org",
"dd@sina.org", "ee@zttc.edu", "ff@itat.org" };
private String[] contents = { "welcome to visited the space,I like book",
"hello boy,I like pingpeng ball", "my name is cc I like game",
"I like football", "I like football and I like basketball too",
"I like movie and swim" };
// 邮件日期
private Date[] dates = null;
private int[] attachs = { 2, 3, 1, 4, 5, 5 };
private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike",
"jake" };
private Directory directory = null;
// NRT搜索
private SearcherManager mgr = null;
private NRTManager nrtMgr = null;
private IndexWriter writer = null;
// 加权
private Map<String, Float> scores = new HashMap<String, Float>();
// 构造方法实例化Directory
public IndexUtil() {
try {
setDates();
scores.put("itat.org", 2.0f);
scores.put("zttc.edu", 1.5f);
directory = FSDirectory.open(new File("d:/lucene/index02"));
// mgr = new SearcherManager(directory, new SearcherWarmer() {
// @Override
// public void warm(IndexSearcher s) throws IOException {
// System.out.println("has changed");
// }
// }, Executors.newCachedThreadPool());
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
nrtMgr = new NRTManager(writer, new SearcherWarmer() {
@Override
public void warm(IndexSearcher arg0) throws IOException {
System.out.println("reopen");
}
});
// 0.0025*10000=25秒重新打开一次searcher
NRTManagerReopenThread reopen = new NRTManagerReopenThread(nrtMgr,
5.0, 0.025);
// 后台进程
reopen.setDaemon(true);
reopen.setName("Nrt Manager Reopen Thread");
// 启动NRTManager的reopen线程
reopen.start();
// true表示引起索引的更新,该提交的进行提交
mgr = nrtMgr.getSearcherManager(true);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:delete
*@Description:删除索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:24
*/
public void delete() {
try {
// 删除全部索引
// writer.deleteAll();
nrtMgr.deleteDocuments(new Term("id", "11"));
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:update
*@Description:更新索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:42
*/
public void update() {
try {
// lucene的更新是先删除再添加
Document doc = new Document();
doc.add(new Field("id", "11", Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email", emails[0], Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("content", contents[0], Field.Store.NO,
Field.Index.ANALYZED));
doc.add(new Field("name", names[0], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
nrtMgr.updateDocument(new Term("id", "1"), doc);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:query
*@Description:查询索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:52
*/
public void query() {
try {
IndexReader reader = IndexReader.open(directory);
// 通过reader可以有效的获取到文档的数量
System.out.println("numDocs:" + reader.numDocs());
System.out.println("maxDocs:" + reader.maxDoc());
System.out.println("deleteDocs:" + reader.numDeletedDocs());
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:index
*@Description:创建索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:59:01
*/
public void index() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
Document doc = null;
// 清空
writer.deleteAll();
for (int i = 0; i < ids.length; i++) {
doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email", emails[i], Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("content", contents[i], Field.Store.NO,
Field.Index.ANALYZED));
doc.add(new Field("name", names[i], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new NumericField("attach", Field.Store.YES, true)
.setIntValue(attachs[i]));
doc.add(new NumericField("date", Field.Store.YES, true)
.setLongValue(dates[i].getTime()));
// 获取邮箱后缀
String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
System.out.println(et);
if (scores.containsKey(et)) {
doc.setBoost(scores.get(et));
} else {
doc.setBoost(0.5f);
}
writer.addDocument(doc);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
public void setDates() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
dates = new Date[ids.length];
try {
dates[0] = sdf.parse("2010-02-19");
dates[1] = sdf.parse("2012-01-11");
dates[2] = sdf.parse("2010-09-19");
dates[3] = sdf.parse("2010-12-22");
dates[4] = sdf.parse("2012-01-01");
dates[5] = sdf.parse("2011-05-19");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:formateNumToDateString
*@Description:将字符串的数字格式的日期转换为日期格式
*@param num
*@author:半仙儿
*@return String
*@date:2015-4-14下午04:03:18
*/
public String formateNumToDateString(Long num) {
Date dat = new Date(num);
GregorianCalendar gc = new GregorianCalendar();
gc.setTime(dat);
java.text.SimpleDateFormat format = new java.text.SimpleDateFormat(
"yyyy-MM-dd");
String sb = format.format(gc.getTime());
return sb;
}
/**
*@MethodName:searcher02
*@Description:NTR实时搜索
*@author:半仙儿
*@return void
*@date:2015-4-27下午03:04:17
*/
public void searcher02() {
// 使用searcherManager进行获取IndexSearcher------NRT
IndexSearcher searcher = mgr.acquire();
try {
// 是否要重新打开一个searcher
// mgr.maybeReopen();
TermQuery query = new TermQuery(new Term("content", "like"));
TopDocs tds = searcher.search(query, 10);
for (ScoreDoc sd : tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id") + "---->" + doc.get("name")
+ "[" + doc.get("email") + "]-->" + doc.get("id") + ","
+ doc.get("attach") + "," + doc.get("date"));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
// 释放
mgr.release(searcher);
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
*@MethodName:commit
*@Description:提交操作
*@author:半仙儿
*@return void
*@date:2015-4-27下午02:55:56
*/
public void commit() {
try {
writer.commit();
} catch (Exception e) {
e.printStackTrace();
}
}
}