lucene 4.6 为数据库建立增量索引
首先去官网下载lucene ,
地址:http://www.apache.org/dyn/closer.cgi/lucene/java/4.6.0
下载IK分词源码,地址 : http://code.google.com/p/ik-analyzer/downloads/list
添加lucene jar包:
导入分词源码:
这里以一个商品表为例,对该表建立索引,并进行查询
商品表对应的bean:
为方便以后为数据库其他表建索引,采用模板模式,建一个抽象类,把建索引的方法和
转化为bean的方法写成抽象方法,并使用泛型,方便子类继承,代码如下:
构建一个子类,继承上面的抽象类,实现器抽象方法:
对数据库建立索引:
从数据库中查找出所有的商品记录,调用上面的createIndex方法进行建索引,
建完索引后如何维护,也就是说以后数据库中记录发生变化后,索引如何进行更新?
对于数据库记录只有增加的情况来说,我们可以在表中加1个flag字段,标志是否已
为其建立了索引,建立索引时把其置为1,再把建索引的方法写成spring的定时任务
下次建索引时,只为flag为0的建增量索引。
但是对于数据库的更新,删除记录,如何使索引与其一致,我现在还没想到合适的
方法(菜鸟一个,功力不够啊!),
希望广大网友能够提点建议。。。这也是我写这篇博客最主要的目的。
下载IK分词源码,地址 : http://code.google.com/p/ik-analyzer/downloads/list
添加lucene jar包:
导入分词源码:
这里以一个商品表为例,对该表建立索引,并进行查询
商品表对应的bean:
public class Goods implements java.io.Serializable {
// Fields
private Integer id;
private String name;
private String describe;
private Timestamp uploadTime;
private Double price;
private Integer newOld;
private String imageName;
private Integer userId;
private Integer specialGoodsId;
private Boolean state;
private Integer needSpecialGoodsId;
private String needName;
//....省略getter和setter
为方便以后为数据库其他表建索引,采用模板模式,建一个抽象类,把建索引的方法和
转化为bean的方法写成抽象方法,并使用泛型,方便子类继承,代码如下:
package com.sms.web.lucene;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public abstract class LuceneSearch<T> {
public File indexDir; // 存放索引文件的目录
protected static Analyzer analyzer = new IKAnalyzer(); // 分词器
public LuceneSearch(File indexDir) {
this.indexDir = indexDir;
}
/**
* 为数据库检索数据创建索引
*
* @param <T>
*/
public void createIndex(List<T> items) {
Directory directory = null;
IndexWriter indexWriter = null;
try {
directory = FSDirectory.open(indexDir);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);// 设置打开索引模式为创建或追加
indexWriter = new IndexWriter(directory, config);
// 装配成document
List<Document> docs = getDoc(items);
for (Document doc : docs) {
indexWriter.addDocument(doc);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (indexWriter != null)
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 搜索索引
*
* @param queryStr
* @param queryField
* @return
*/
public List<T> search(String queryStr, String queryField) {
List<T> hitItem = null;
IndexReader reader = null;
IndexSearcher indexSearcher = null;
try {
reader = DirectoryReader.open(FSDirectory.open(indexDir));
indexSearcher = new IndexSearcher(reader);
// analyzer = new IKAnalyzer();
QueryParser parser = new QueryParser(Version.LUCENE_46, queryField, analyzer);
Query query = parser.parse(queryStr);
ScoreDoc[] hits = indexSearcher.search(query, 100).scoreDocs;
hitItem = toBean(indexSearcher, query, hits);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader != null)
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return hitItem;
}
/**
* 装配成document对象
*
* @param goods
* @return
*/
public abstract List<Document> getDoc(List<T> items);
/**
* 将搜索结果还原成Bean
*
* @param indexSearcher
* @param query
* @param hits
* @return
*/
public abstract List<T> toBean(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits);
/**
* 高亮设置
*
* @param query
* @param doc
* @param field
* @return
*/
protected String toHighlighter(Query query, Document doc, String field) {
try {
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"blue\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
return highlighterStr == null ? doc.get(field) : highlighterStr;
} catch (IOException e) {
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}
return null;
}
}
构建一个子类,继承上面的抽象类,实现器抽象方法:
package com.sms.web.lucene;
import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import com.sms.web.model.Goods;
public class SearchGood extends LuceneSearch<Goods> {
public SearchGood(File indexDir) {
super(indexDir);
}
@Override
public List<Document> getDoc(List<Goods> goods) {
List<Document> docs = new ArrayList<Document>();
FieldType ftIndex = new FieldType(); // 索引类型
ftIndex.setIndexed(true); // 设置索引为true
ftIndex.setStored(true); // 设置保存为true
ftIndex.setTokenized(true); // 设置分词为true
for (Goods good : goods) {
Document doc = new Document();
Document docNeedName = new Document();
Field name = new Field("name", good.getName(), ftIndex);
Field needName = new Field("needName", good.getNeedName(), ftIndex);
Field id = new StoredField("id", good.getId());
Field describe = new StoredField("describe", good.getDescribe());
Field uploadTime = new StoredField("uploadTime", good.getUploadTime().toString());
Field price = new StoredField("price", good.getPrice());
Field newOld = new StoredField("newOld", good.getNewOld());
Field imageName = new StoredField("imageName", good.getImageName());
Field userId = new StoredField("userId", good.getUserId());
Field specialGoodsId = new StoredField("specialGoodsId", good.getSpecialGoodsId());
Field state = new StoredField("state", good.getState().toString());
Field needSpecialGoodsId = new StoredField("needSpecialGoodsId", good.getNeedSpecialGoodsId());
//混合名
Field mixName1=new Field("mixName", good.getName(), ftIndex);
Field mixName2=new Field("mixName", good.getNeedName(), ftIndex);
doc.add(name);
doc.add(needName);
doc.add(id);
doc.add(describe);
doc.add(uploadTime);
doc.add(price);
doc.add(newOld);
doc.add(imageName);
doc.add(userId);
doc.add(specialGoodsId);
doc.add(state);
doc.add(needSpecialGoodsId);
doc.add(mixName1);
docNeedName.add(mixName2);
docs.add(doc);
docs.add(docNeedName);
}
return docs;
}
@Override
public List<Goods> toBean(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits) {
List<Goods> hitGoods = new ArrayList<Goods>();
try {
for (int i = 0; i < hits.length; i++) {
Goods good = new Goods();
Document hitDoc = indexSearcher.doc(hits[i].doc);
String id = hitDoc.get("id");
// 高亮关键字
String name = toHighlighter(query, hitDoc, "name");
// 高亮关键字
String needName = toHighlighter(query, hitDoc, "needName");
String describe = hitDoc.get("describe");
String uploadTime = hitDoc.get("uploadTime");
String price = hitDoc.get("price");
String newOld = hitDoc.get("newOld");
String imageName = hitDoc.get("imageName");
String userId = hitDoc.get("userId");
String specialGoodsId = hitDoc.get("specialGoodsId");
String state = hitDoc.get("state");
String needSpecialGoodsId = hitDoc.get("needSpecialGoodsId");
good.setId(Integer.parseInt(id));
good.setName(name);
good.setDescribe(describe);
good.setUploadTime(Timestamp.valueOf(uploadTime));
good.setPrice(Double.parseDouble(price));
good.setNewOld(Integer.parseInt(newOld));
good.setImageName(imageName);
good.setUserId(Integer.parseInt(userId));
good.setSpecialGoodsId(Integer.parseInt(specialGoodsId));
good.setState(Boolean.parseBoolean(state));
good.setNeedSpecialGoodsId(Integer.parseInt(needSpecialGoodsId));
good.setNeedName(needName);
hitGoods.add(good);
}
} catch (IOException e) {
e.printStackTrace();
}
return hitGoods;
}
}
对数据库建立索引:
从数据库中查找出所有的商品记录,调用上面的createIndex方法进行建索引,
建完索引后如何维护,也就是说以后数据库中记录发生变化后,索引如何进行更新?
对于数据库记录只有增加的情况来说,我们可以在表中加1个flag字段,标志是否已
为其建立了索引,建立索引时把其置为1,再把建索引的方法写成spring的定时任务
下次建索引时,只为flag为0的建增量索引。
但是对于数据库的更新,删除记录,如何使索引与其一致,我现在还没想到合适的
方法(菜鸟一个,功力不够啊!),
希望广大网友能够提点建议。。。这也是我写这篇博客最主要的目的。