lucene 4.6 为数据库建立增量索引

最新推荐文章于 2021-02-02 07:08:05 发布

笨菜鸟007

最新推荐文章于 2021-02-02 07:08:05 发布

阅读量4.3k

点赞数 1

分类专栏： Java 文章标签： lucene4.6 数据库增量索引

本文链接：https://blog.csdn.net/th676759829/article/details/17753803

版权

Java 专栏收录该内容

36 篇文章 0 订阅

订阅专栏

lucene 4.6 为数据库建立增量索引

首先去官网下载lucene ，地址：http://www.apache.org/dyn/closer.cgi/lucene/java/4.6.0

下载IK分词源码,地址 : http://code.google.com/p/ik-analyzer/downloads/list

添加lucene jar包:

导入分词源码:

这里以一个商品表为例,对该表建立索引,并进行查询

商品表对应的bean:

public class Goods implements java.io.Serializable {

	// Fields

	private Integer id;
	private String name;
	private String describe;
	private Timestamp uploadTime;
	private Double price;
	private Integer newOld;
	private String imageName;
	private Integer userId;
	private Integer specialGoodsId;
	private Boolean state;
	private Integer needSpecialGoodsId;
	private String needName;
        //....省略getter和setter

为方便以后为数据库其他表建索引,采用模板模式,建一个抽象类,把建索引的方法和
转化为bean的方法写成抽象方法,并使用泛型,方便子类继承,代码如下:

package com.sms.web.lucene;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public abstract class LuceneSearch<T> {
	public File indexDir; // 存放索引文件的目录
	protected static Analyzer analyzer = new IKAnalyzer(); // 分词器

	public LuceneSearch(File indexDir) {
		this.indexDir = indexDir;
	}

	/**
	 * 为数据库检索数据创建索引
	 * 
	 * @param <T>
	 */
	public void createIndex(List<T> items) {

		Directory directory = null;
		IndexWriter indexWriter = null;
		try {
			directory = FSDirectory.open(indexDir);
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
			config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);// 设置打开索引模式为创建或追加
			indexWriter = new IndexWriter(directory, config);
			// 装配成document
			List<Document> docs = getDoc(items);
			for (Document doc : docs) {
				indexWriter.addDocument(doc);
			}

		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			if (indexWriter != null)
				try {
					indexWriter.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			if (directory != null) {
				try {
					directory.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

	/**
	 * 搜索索引
	 * 
	 * @param queryStr
	 * @param queryField
	 * @return
	 */
	public List<T> search(String queryStr, String queryField) {
		List<T> hitItem = null;
		IndexReader reader = null;
		IndexSearcher indexSearcher = null;
		try {
			reader = DirectoryReader.open(FSDirectory.open(indexDir));
			indexSearcher = new IndexSearcher(reader);
			// analyzer = new IKAnalyzer();
			QueryParser parser = new QueryParser(Version.LUCENE_46, queryField, analyzer);
			Query query = parser.parse(queryStr);
			ScoreDoc[] hits = indexSearcher.search(query, 100).scoreDocs;
			hitItem = toBean(indexSearcher, query, hits);
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			if (reader != null)
				try {
					reader.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
		}

		return hitItem;
	}

	/**
	 * 装配成document对象
	 * 
	 * @param goods
	 * @return
	 */
	public abstract List<Document> getDoc(List<T> items);

	/**
	 * 将搜索结果还原成Bean
	 * 
	 * @param indexSearcher
	 * @param query
	 * @param hits
	 * @return
	 */
	public abstract List<T> toBean(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits);

	/**
	 * 高亮设置
	 * 
	 * @param query
	 * @param doc
	 * @param field
	 * @return
	 */
	protected String toHighlighter(Query query, Document doc, String field) {
		try {
			SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"blue\">", "</font>");
			Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
			TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
			String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
			return highlighterStr == null ? doc.get(field) : highlighterStr;
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InvalidTokenOffsetsException e) {
			e.printStackTrace();
		}
		return null;
	}
}

构建一个子类,继承上面的抽象类,实现器抽象方法:

package com.sms.web.lucene;

import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;

import com.sms.web.model.Goods;

public class SearchGood extends LuceneSearch<Goods> {

	public SearchGood(File indexDir) {
		super(indexDir);
	}

	@Override
	public List<Document> getDoc(List<Goods> goods) {
		List<Document> docs = new ArrayList<Document>();
		FieldType ftIndex = new FieldType(); // 索引类型
		ftIndex.setIndexed(true); // 设置索引为true
		ftIndex.setStored(true); // 设置保存为true
		ftIndex.setTokenized(true); // 设置分词为true
		for (Goods good : goods) {
			Document doc = new Document();
			Document docNeedName = new Document();
			
			Field name = new Field("name", good.getName(), ftIndex);
			Field needName = new Field("needName", good.getNeedName(), ftIndex);
			Field id = new StoredField("id", good.getId());
			Field describe = new StoredField("describe", good.getDescribe());
			Field uploadTime = new StoredField("uploadTime", good.getUploadTime().toString());
			Field price = new StoredField("price", good.getPrice());
			Field newOld = new StoredField("newOld", good.getNewOld());
			Field imageName = new StoredField("imageName", good.getImageName());
			Field userId = new StoredField("userId", good.getUserId());
			Field specialGoodsId = new StoredField("specialGoodsId", good.getSpecialGoodsId());
			Field state = new StoredField("state", good.getState().toString());
			Field needSpecialGoodsId = new StoredField("needSpecialGoodsId", good.getNeedSpecialGoodsId());
			//混合名
			Field mixName1=new Field("mixName", good.getName(), ftIndex);
			Field mixName2=new Field("mixName", good.getNeedName(), ftIndex);
			
			doc.add(name);
			doc.add(needName);
			doc.add(id);
			doc.add(describe);
			doc.add(uploadTime);
			doc.add(price);
			doc.add(newOld);
			doc.add(imageName);
			doc.add(userId);
			doc.add(specialGoodsId);
			doc.add(state);
			doc.add(needSpecialGoodsId);
			
			doc.add(mixName1);
			
			docNeedName.add(mixName2);
			
			docs.add(doc);
			docs.add(docNeedName);
		}
		return docs;
	}

	@Override
	public List<Goods> toBean(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits) {
		List<Goods> hitGoods = new ArrayList<Goods>();
		try {
			for (int i = 0; i < hits.length; i++) {
				Goods good = new Goods();
				Document hitDoc = indexSearcher.doc(hits[i].doc);
				String id = hitDoc.get("id");
				// 高亮关键字
				String name = toHighlighter(query, hitDoc, "name");
				// 高亮关键字
				String needName = toHighlighter(query, hitDoc, "needName");
				String describe = hitDoc.get("describe");
				String uploadTime = hitDoc.get("uploadTime");
				String price = hitDoc.get("price");
				String newOld = hitDoc.get("newOld");
				String imageName = hitDoc.get("imageName");
				String userId = hitDoc.get("userId");
				String specialGoodsId = hitDoc.get("specialGoodsId");
				String state = hitDoc.get("state");
				String needSpecialGoodsId = hitDoc.get("needSpecialGoodsId");

				good.setId(Integer.parseInt(id));
				good.setName(name);
				good.setDescribe(describe);
				good.setUploadTime(Timestamp.valueOf(uploadTime));
				good.setPrice(Double.parseDouble(price));
				good.setNewOld(Integer.parseInt(newOld));
				good.setImageName(imageName);
				good.setUserId(Integer.parseInt(userId));
				good.setSpecialGoodsId(Integer.parseInt(specialGoodsId));
				good.setState(Boolean.parseBoolean(state));
				good.setNeedSpecialGoodsId(Integer.parseInt(needSpecialGoodsId));
				good.setNeedName(needName);
				hitGoods.add(good);
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
		return hitGoods;
	}

}

对数据库建立索引:
从数据库中查找出所有的商品记录,调用上面的createIndex方法进行建索引,
建完索引后如何维护,也就是说以后数据库中记录发生变化后,索引如何进行更新?
对于数据库记录只有增加的情况来说,我们可以在表中加1个flag字段,标志是否已
为其建立了索引,建立索引时把其置为1,再把建索引的方法写成spring的定时任务
下次建索引时,只为flag为0的建增量索引。
但是对于数据库的更新，删除记录，如何使索引与其一致，我现在还没想到合适的
方法（菜鸟一个，功力不够啊！），
希望广大网友能够提点建议。。。这也是我写这篇博客最主要的目的。