Lucene 初试

项目数据量较大,如果从数据库查询,效率较低,所以用到了lucene。


针对项目的需求,写了一个工具类。还要更多的修改。


日期排序,将日期转换成long类型的。


Lucene版本3.6.2

IKAnalyzer2012_u6

package t.util;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import t.Constants;
import t.model.News;

/**
 * Lucene 索引工具类
 * 
 * @auhtor: tangjing
 * @date:2013-2-4
 */
public class LuceneUtil {

	/**
	 * lucene 索引文件夹地址
	 */
	public static final String LUCENE_INDEX_DIR = "c://luceneTest";

	/**
	 * 新闻ID 索引域名
	 */
	public static final String FIELDNAME_NEWS_ID = "id";
	/**
	 * 新闻内容 索引域名
	 */
	public static final String FIELDNAME_NEWS_CONTENT = "content";
	/**
	 * 新闻发布时间 索引域名
	 */
	public static final String FIELDNAME_NEWS_DATE = "date";
	/**
	 * 新闻来源 索引域名
	 */
	public static final String FIELDNAME_NEWS_SOURCE = "source";

	/**
	 * 创建索引 单个对象
	 * 
	 * @param news
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static void createIndexByNews(News news) {
		try {
			if (news != null) {
				IndexWriter indexWriter = getIndexWriter();
				indexWriter.addDocument(getDocumentByNews(news));
				indexWriter.close();
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 创建索引 news的集合
	 * 
	 * @param news
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static void createIndexByNewsList(List<News> newsList) {
		try {
			if (newsList != null) {
				IndexWriter indexWriter = getIndexWriter();
				for (Iterator<News> iterator = newsList.iterator(); iterator
						.hasNext();) {
					News news = (News) iterator.next();
					indexWriter.addDocument(getDocumentByNews(news));
				}
				indexWriter.close();
			}
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	/**
	 * 默认排序
	 * 
	 * @param keywords
	 * @param size
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static List<News> searchNewsIndex(String keywords, int size) {
		return searchNewsIndex(keywords, size, new Sort());
	}

	/**
	 * 根据时间排序
	 * 
	 * @param keywords
	 * @param size
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	public static List<News> searchNewsIndexOrderByDate(String keywords,
			int size) {
		Sort sort = new Sort(new SortField(FIELDNAME_NEWS_DATE, SortField.LONG,
				true));
		return searchNewsIndex(keywords, size, sort);
	}

	/**
	 * 
	 * @param keywords
	 *            关键词
	 * @param size
	 *            查询的条数
	 * @param sore
	 *            查询的排序方式 如果为空,默认以相关性排序
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-1
	 */
	private static List<News> searchNewsIndex(String keywords, int size,
			Sort sort) {
		// 搜索
		List<News> list = null;
		try {
			Directory directory = FSDirectory.open(getIndexFile());
			IndexReader indexReader = IndexReader.open(directory);
			IndexSearcher searcher = new IndexSearcher(indexReader);
			Analyzer analyzer = new IKAnalyzer();
			QueryParser parser = new QueryParser(Version.LUCENE_36,
					FIELDNAME_NEWS_CONTENT, analyzer);
			// 设置词条之间的关系是AND 这里如果不设置,就是默认是OR
			// parser.setDefaultOperator(QueryParser.AND_OPERATOR);
			Query query = parser.parse(keywords);
			TopDocs topDocs = searcher.search(query, size, sort);
			list = new ArrayList<News>();
			ScoreDoc[] docs = topDocs.scoreDocs;
			for (ScoreDoc doc : docs) {
				Document d = searcher.doc(doc.doc);
				list.add(getNewsByDocument(d));
			}
		} catch (NumberFormatException e) {
			e.printStackTrace();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
		return list;
	}

	/**
	 * 根据新闻对象,返回lucene文档对象
	 * 
	 * @param news
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static Document getDocumentByNews(News news) {
		Document document = new Document();
		// ID不用建立索引
		document.add(new Field(FIELDNAME_NEWS_ID, news.getId() + "",
				Field.Store.YES, Field.Index.NO));
		document.add(new Field(FIELDNAME_NEWS_CONTENT, news.getContent(),
				Field.Store.YES, Field.Index.ANALYZED, TermVector.YES));
		document.add(new Field(FIELDNAME_NEWS_DATE, news.getCreateDate()
				.getTime() + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
		// 网站可以建立索引,不用分词
		document.add(new Field(FIELDNAME_NEWS_SOURCE, news.getNetsite(),
				Field.Store.YES, Field.Index.NOT_ANALYZED));
		return document;
	}

	/**
	 * 根据索引文档,转换为news对象
	 * 
	 * @param document
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static News getNewsByDocument(Document document) {
		News news = new News();
		news.setId(Integer.parseInt(document.get(FIELDNAME_NEWS_ID)));
		news.setContent(document.get(FIELDNAME_NEWS_CONTENT));
		news.setNetsite(document.get(FIELDNAME_NEWS_SOURCE));
		Date date = new Date(Long.parseLong(document.get(FIELDNAME_NEWS_DATE)));
		news.setCreateDate(date);
		return news;
	}

	/**
	 * 获得IndexWriter对象
	 * 
	 * @return
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static IndexWriter getIndexWriter() {
		IndexWriter indexWriter = null;
		try {
			// IK分词器
			Analyzer analyzer = new IKAnalyzer();
			Directory directory = FSDirectory.open(getIndexFile());
			IndexWriterConfig writerConfig = new IndexWriterConfig(
					Version.LUCENE_36, analyzer);
			indexWriter = new IndexWriter(directory, writerConfig);
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return indexWriter;
	}

	/**
	 * 索引路径
	 * 
	 * @return
	 * @throws IOException
	 * @auhtor: tangjing
	 * @date:2013-2-4
	 */
	private static File getIndexFile() throws IOException {
		File indexFile = new File(Constants.LUCENE_INDEX_DIR);
		if (!indexFile.exists()) {
			indexFile.createNewFile();
		}
		return indexFile;
	}

}




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值