Lucene 扩展

目的:在Lucene上扩展创建索引和查询索引功能。(针对数据库)
所需jar包:
lucene-core-2.4.1.jar
lucene-highlighter-2.4.1.jar
log4j-1.2.14.jar
commons-beanutils-1.5.jar
commons-collections-2.1.1.jar
commons-logging-1.0.4.jar

清单一:DocumentFactory.java
/*
* @(#)Documents.java 2009-10-09
*/
package com.ordinov.lucene;

import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

/**
* 索引文件内容创建类
*
* @author weich
* @Date 2009-10-09
*
*/
public class DocumentFactory {


/**
* 获取数据内容用来建立索引(针对单个对象)<br>
* 默认Bean类中第一个属性处理方式为:建立索引但是不使用分词
*
* @param <T> 实体Bean对象
* @param fields Bean对象的Field属性
* @param obj 需要转换的Bean对象
* @return
* @throws java.io.FileNotFoundException
* @throws IllegalAccessException
* @throws InvocationTargetException
* @throws NoSuchMethodException
*/
public static <T> Document getDataDocument(java.lang.reflect.Field[] fields,T obj)throws java.io.FileNotFoundException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {

Document doc = new Document();
doc.add(new Field(fields[0].getName(),BeanUtils.getProperty(obj, fields[0].getName()), Field.Store.YES, Field.Index.NOT_ANALYZED));
for(int i =1; i < fields.length; i++){
doc.add(new Field(fields[i].getName(),BeanUtils.getProperty(obj, fields[i].getName()), Field.Store.YES, Field.Index.ANALYZED));
}
return doc;
}

/**
* 获取数据内容用来建立索引(针对多个对象)<br>
* 默认Bean类中第一个属性处理方式为:建立索引但是不使用分词
*
* @param <T> 实体Bean对象
* @param cls Bean类的Class对象
* @param objs 需要转换的Bean对象数组
* @return
* @throws java.io.FileNotFoundException
* @throws IllegalAccessException
* @throws InvocationTargetException
* @throws NoSuchMethodException
*/
public static <T> Document[] getDataDocuments(Class<T> cls,T[] objs)throws java.io.FileNotFoundException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {

List<Document> docs= new ArrayList<Document>();
java.lang.reflect.Field[] fields = cls.getDeclaredFields();
for(T obj : objs)
docs.add(getDataDocument(fields,obj));
return docs.toArray(new Document[0]);
}
}


清单二:IndexManger.java

/*
* @(#)IndexFactory.java 2009-10-09
*/
package com.ordinov.lucene;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;

/**
* 检索工厂类<br>
* 主要负责索引的创建工作,提供检索类调用。
*
* @author weich
* @Date 2009-10-09
*
*/
public class IndexManger {

/** 日志记录器 */
static private Logger logger = Logger.getLogger(IndexManger.class);
/** 分词器 StandardAnalyzer 按字分词,支持中文分词 */
private StandardAnalyzer analyzer = null;
/** 索引写出类对象主要负责索引的创建 */
private IndexWriter writer = null;
/** 是否新建索引 true-新建索引,false-追加索引。默认为true */
private boolean isNewCreat = true;
/** 索引文件保存路径 */
private String indexPath= null;
/** 对搜索结果内容进行过滤,可以禁止搜索部分词汇 */
private String [] stopStrs = {};

/**
* IndexFactory构造器 <br>
* 初始化创建索引文件时必须的一些属性
*
* @throws IOException
* @throws LockObtainFailedException
* @throws CorruptIndexException
*
*/
public IndexManger() throws CorruptIndexException, LockObtainFailedException, IOException{

/* 初始化所需对象实例 */
init();
}

/**
* IndexFactory 构造器<br>
* 初始化创建索引文件时必须的一些属性
*
* @param indexPath 索引文件保存路径
* @param stopStrs 搜索结果过滤词汇
* @param isCreat 是否新创建索引
*
* @throws CorruptIndexException
* @throws LockObtainFailedException
* @throws IOException
*/
public IndexManger(String indexPath,String [] stopStrs,boolean isCreat) throws CorruptIndexException, LockObtainFailedException, IOException{

if(indexPath != null && !"".equals(indexPath)){
this.indexPath=indexPath;
}
if(stopStrs != null && stopStrs.length > 0){
this.stopStrs=stopStrs;
}
this.isNewCreat=isCreat;
/* 初始化所需对象实例 */
init();
}

/**
* 初始化对象实例<br>
* 创建分词器对象以及索引写出对象
*
* @throws CorruptIndexException
* @throws LockObtainFailedException
* @throws IOException
*/
private void init() throws CorruptIndexException, LockObtainFailedException, IOException{

analyzer=new StandardAnalyzer(stopStrs);
writer= new IndexWriter(new File(indexPath),analyzer,this.isNewCreat,IndexWriter.MaxFieldLength.UNLIMITED);
}

/**
* 创建索引文件
*
* @param docs 需要添加到
*
* @throws IOException
* @throws CorruptIndexException
*/
private void addDocs(Document[] docs) throws CorruptIndexException, IOException{

if(docs != null && docs.length > 0){
for(int i=0; i<docs.length;i++){
/* 向IndexWriter对象中加入Document记录 */
this.addDoc(docs[i]);
}
}
}

/**
* 向IndexWriter对象中添加一条Document记录
*
* @param doc 需要在
* @throws IOException
* @throws CorruptIndexException
*/
private void addDoc(Document doc) throws CorruptIndexException, IOException{

/* 向IndexWriter对象中加入Document记录 */
writer.addDocument(doc);
}

/**
* 在磁盘上创建索引文件,并优化合并,最后会关闭IndexWriter对象
*
* @throws IOException
* @throws CorruptIndexException
*/
private void close() throws CorruptIndexException, IOException{

logger.debug("关闭索引写出对象实例...");
/* 将缓存中索引文件写入磁盘,并优化合并。 */
writer.optimize();
/* 关闭IndexWriter对象 */
writer.close();
}

/**
* 创建索引根据用户指定的类型
*
* @param <T>
* @param cls Bean类的Class对象
* @param objs Bean对象数组
* @throws CorruptIndexException
* @throws FileNotFoundException
* @throws IOException
* @throws IllegalAccessException
* @throws InvocationTargetException
* @throws NoSuchMethodException
*/
public <T> void createIndex(Class<T> cls,T[] objs) throws CorruptIndexException, FileNotFoundException, IOException, IllegalAccessException, InvocationTargetException, NoSuchMethodException{

this.addDocs(DocumentFactory.getDataDocuments(cls, objs));
/* 关闭索引写出对象 */
this.close();
}
}


清单三:SerchIndex.java
package com.ordinov.lucene;

import java.io.StringReader;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.beanutils.BeanUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;

public class SerchIndex {

/**
* 执行查询方法
*
* @param <T> 需要转换的Bean类型
* @param cls Bean类的class对象
* @param keyword 关键字
* @param indexPath 索引所在的目录
* @param rowCount 每页显示的记录数
* @param current 当前需要查看的页数
* @return
* @throws Exception
*/
public <T> List<T> initSearch(Class<T> cls, String keyword, int rowCount,int current, String... indexPaths)throws Exception{

/* 用来保存填充完毕的返回对象 */
List<T> objs = new ArrayList<T>();
/* 分词器 StandardAnalyzer 按字分词,支持中文分词 */
Analyzer analyzer = new StandardAnalyzer();
Field[] fields = cls.getDeclaredFields();
/* 关键字都去匹配那些列 */
String[] colmuns = new String[fields.length];
/* 查询关键字 */
String[] keyWords = new String[fields.length];
for(int i =0;i < fields.length; i++){
colmuns[i] = fields[i].getName();
keyWords[i] = keyword;
}
if(indexPaths == null || indexPaths.length <= 0){
return objs;
}
IndexSearcher[] searchers = new IndexSearcher[indexPaths.length];
for(int i = 0; i < indexPaths.length; i++){
/* 索引读取对象 */
IndexReader reader = IndexReader.open(indexPaths[i]);
/* 创建索引查询对象 */
searchers[i] = new IndexSearcher(reader);
}
MultiSearcher multisearcher = new MultiSearcher(searchers);
Query query = MultiFieldQueryParser.parse(keyWords, colmuns, analyzer);
/* 缓冲记录数 */
TopDocCollector collector = new TopDocCollector(rowCount);
/* 执行查询 */
multisearcher.search(query,collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;

for(int i = (current - 1) * rowCount;i<current * rowCount; i++){
int docId = hits[i].doc;
Document doc = multisearcher.doc(docId);
T obj = cls.newInstance();
for(int j =0;j < fields.length; j++){
String str = doc.get(fields[j].getName());
/* 搜索关键字高亮处理 */
SimpleHTMLFormatter sHtmlF = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Highlighter highlighter = new Highlighter(sHtmlF,new QueryScorer(query));
highlighter.setTextFragmenter(new SimpleFragmenter(100));
if (str != null && !"".equals(str)) {
TokenStream tokenStream = analyzer.tokenStream(fields[j].getName(), new StringReader(str));
String value = highlighter.getBestFragment(tokenStream, str);
/* 如果不存在关键字依然显示 */
if(value != null && !"".equals(value)){
BeanUtils.setProperty(obj, fields[j].getName(), value);
}else{
BeanUtils.setProperty(obj, fields[j].getName(), str);
}
}
}
objs.add(obj);
}
multisearcher.close();
return objs;
}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值