package org.itat.index;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class IndexUtil {
private String[] ids = { "1", "2", "3", "4", "5", "6" };
private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org",
"dd@sina.org", "ee@zttc.edu", "ff@itat.org" };
private String[] contents = { "welcome to visited the space,I like book",
"hello boy,I like pingpeng ball", "my name is cc I like game",
"I like football", "I like football and I like basketball too",
"I like movie and swim" };
// 邮件日期
private Date[] dates = null;
private int[] attachs = { 2, 3, 1, 4, 5, 5 };
private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike",
"jake" };
private Directory directory = null;
// 加权
private Map<String, Float> scores = new HashMap<String, Float>();
// 构造方法实例化Directory
public IndexUtil() {
try {
setDates();
scores.put("itat.org", 2.0f);
scores.put("zttc.edu", 1.5f);
directory = FSDirectory.open(new File("d:/lucene/index02"));
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:delete
*@Description:删除索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:24
*/
public void delete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
// 删除全部索引
// writer.deleteAll();
writer.deleteDocuments(new Term("id", "1"));
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:forceDelete
*@Description:彻底删除索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:14
*/
public void forceDelete() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
writer.forceMergeDeletes();
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:undelete
*@Description:还原索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:03
*/
public void undelete() {
try {
// 使用indexReader进行恢复
IndexReader reader = IndexReader.open(directory, false);
reader.undeleteAll();
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:update
*@Description:更新索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:42
*/
public void update() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
// lucene的更新是先删除再添加
Document doc = new Document();
doc.add(new Field("id", "11", Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email", emails[0], Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("content", contents[0], Field.Store.NO,
Field.Index.ANALYZED));
doc.add(new Field("name", names[0], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
writer.updateDocument(new Term("id", "1"), doc);
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:query
*@Description:查询索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:58:52
*/
public void query() {
try {
IndexReader reader = IndexReader.open(directory);
// 通过reader可以有效的获取到文档的数量
System.out.println("numDocs:" + reader.numDocs());
System.out.println("maxDocs:" + reader.maxDoc());
System.out.println("deleteDocs:" + reader.numDeletedDocs());
reader.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:merge
*@Description:手动merge
*@author:半仙儿
*@return void
*@date:2015-4-14上午10:07:54
*/
public void merge() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
writer.forceMerge(2);
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:index
*@Description:创建索引
*@author:半仙儿
*@return void
*@date:2015-4-14上午09:59:01
*/
public void index() {
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
Document doc = null;
// 清空
writer.deleteAll();
for (int i = 0; i < ids.length; i++) {
doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email", emails[i], Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("content", contents[i], Field.Store.NO,
Field.Index.ANALYZED));
doc.add(new Field("name", names[i], Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new NumericField("attach", Field.Store.YES, true)
.setIntValue(attachs[i]));
doc.add(new NumericField("date", Field.Store.YES, true)
.setLongValue(dates[i].getTime()));
// 获取邮箱后缀
String et = emails[i].substring(emails[i].lastIndexOf("@") + 1);
System.out.println(et);
if (scores.containsKey(et)) {
doc.setBoost(scores.get(et));
} else {
doc.setBoost(0.5f);
}
writer.addDocument(doc);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if (writer != null) {
try {
writer.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/**
*@MethodName:search
*@Description:搜索
*@author:半仙儿
*@return void
*@date:2015-4-14上午11:01:21
*/
public void search() {
try {
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TermQuery query = new TermQuery(new Term("content", "like"));
TopDocs tds = searcher.search(query, 10);
for (ScoreDoc sd : tds.scoreDocs) {
Document doc = searcher.doc(sd.doc);
System.out.println("(文档号:"
+ sd.doc
+ "权值:"
+ doc.getBoost()
+ "评分:"
+ sd.score
+ ")姓名:"
+ doc.get("name")
+ "["
+ "邮箱:"
+ doc.get("email")
+ "]编号:"
+ doc.get("id")
+ "附件数:"
+ doc.get("attach")
+ ",时间:"
+ formateNumToDateString(Long
.parseLong(doc.get("date"))));
}
} catch (Exception e) {
e.printStackTrace();
}
}
public void setDates() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
dates = new Date[ids.length];
try {
dates[0] = sdf.parse("2010-02-19");
dates[1] = sdf.parse("2012-01-11");
dates[2] = sdf.parse("2010-09-19");
dates[3] = sdf.parse("2010-12-22");
dates[4] = sdf.parse("2012-01-01");
dates[5] = sdf.parse("2011-05-19");
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*@MethodName:formateNumToDateString
*@Description:将字符串的数字格式的日期转换为日期格式
*@param num
*@author:半仙儿
*@return String
*@date:2015-4-14下午04:03:18
*/
public String formateNumToDateString(Long num) {
Date dat = new Date(num);
GregorianCalendar gc = new GregorianCalendar();
gc.setTime(dat);
java.text.SimpleDateFormat format = new java.text.SimpleDateFormat(
"yyyy-MM-dd");
String sb = format.format(gc.getTime());
return sb;
}
}
package org.itat.test;
import org.itat.index.IndexUtil;
import org.junit.Test;
public class TestIndex {
@Test
public void testIndex() {
IndexUtil iu = new IndexUtil();
iu.index();
}
@Test
public void testQuery() {
IndexUtil iu = new IndexUtil();
iu.query();
}
@Test
public void testDelete() {
IndexUtil iu = new IndexUtil();
iu.delete();
}
@Test
public void testUnDelete() {
IndexUtil iu = new IndexUtil();
iu.undelete();
}
@Test
public void testForceDelete() {
IndexUtil iu = new IndexUtil();
iu.forceDelete();
}
@Test
public void testMerge() {
IndexUtil iu = new IndexUtil();
iu.merge();
}
@Test
public void testUpdate() {
IndexUtil iu = new IndexUtil();
iu.update();
}
@Test
public void testSearch() {
IndexUtil iu = new IndexUtil();
iu.search();
}
}