hibernate search 和lucene结合使用实例

以下的代码是根据api帮助文档作出的一个简单实例,在应用方面可以实现创建索引,搜索,过滤和高亮的功能。

整体的环境为:spring2.5.6,hibernate3.3.1,struts2.0.8,lucene2.4.1

第一步,首先是web.xml配置文件,由于使用了ssh2的架构,所以不得不在web.xml里配置一些东西

]<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.5" xmlns="http://java.sun.com/xml/ns/javaee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/javaee
http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd">

<!-- spring的配置文件路径 -->
<context-param>
<param-name>contextConfigLocation</param-name>
<param-value>classpath*:spring/*.xml</param-value>
</context-param>

<!--Hibernate Open Session in View Filter-->
<filter>
<filter-name>hibernateFilter</filter-name>
<filter-class>
org.springframework.orm.hibernate3.support.OpenSessionInViewFilter
</filter-class>
</filter>
<filter-mapping>
<filter-name>hibernateFilter</filter-name>
<url-pattern>*.action</url-pattern>
<dispatcher>REQUEST</dispatcher>
<dispatcher>FORWARD</dispatcher>
</filter-mapping>
<filter-mapping>
<filter-name>hibernateFilter</filter-name>
<url-pattern>*.jsp</url-pattern>
<dispatcher>REQUEST</dispatcher>
<dispatcher>FORWARD</dispatcher>
</filter-mapping>

<listener>
<listener-class>
org.springframework.web.context.ContextLoaderListener
</listener-class>
</listener>

<!-- Spring 刷新Introspector防止内存泄露 -->
<listener>
<listener-class>
org.springframework.web.util.IntrospectorCleanupListener
</listener-class>
</listener>

<!-- Struts Action Mapping-->
<filter>
<filter-name>struts-cleanup</filter-name>
<filter-class>
org.apache.struts2.dispatcher.ActionContextCleanUp
</filter-class>
</filter>
<filter>
<filter-name>struts2</filter-name>
<filter-class>
org.apache.struts2.dispatcher.FilterDispatcher
</filter-class>
</filter>

<filter-mapping>
<filter-name>struts-cleanup</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
<filter-mapping>
<filter-name>struts2</filter-name>
<url-pattern>*.jsp</url-pattern>
<dispatcher>REQUEST</dispatcher>
<dispatcher>FORWARD</dispatcher>
</filter-mapping>
<filter-mapping>
<filter-name>struts2</filter-name>
<url-pattern>*.action</url-pattern>
<dispatcher>REQUEST</dispatcher>
<dispatcher>FORWARD</dispatcher>
</filter-mapping>

<!-- spring自带的字符转换过滤器,转换成utf-8的格式 -->
<filter>
<filter-name>encodingFilter</filter-name>
<filter-class>
org.springframework.web.filter.CharacterEncodingFilter
</filter-class>
<init-param>
<param-name>encoding</param-name>
<param-value>UTF-8</param-value>
</init-param>
</filter>
<filter-mapping>
<filter-name>encodingFilter</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>

<!-- 随服务器启动,自动调用对应的servlet创建索引文件 -->

<servlet>
<servlet-name>CreateHibernateIndex</servlet-name>
<servlet-class>com.test.servlet.CreateHibernateIndex</servlet-class>
<load-on-startup>20</load-on-startup>
</servlet>
<servlet-mapping>
<servlet-name>CreateHibernateIndex</servlet-name>
<url-pattern>/servlet/CreateHibernateIndex</url-pattern>
</servlet-mapping>

<!-- session超时定义,单位为分钟 -->
<session-config>
<session-timeout>20</session-timeout>
</session-config>
<!-- 默认首页定义 -->
<welcome-file-list>
<welcome-file>/index.jsp</welcome-file>
</welcome-file-list>

</web-app>

第二步,配spring配置文件和hibernate文件

这是可以使用hibernate annotation注释的sessionFactory的属性配置的一部分,注意下面的2个使用索引的属性配置,提供文件索引的保存路径和读取方式(fsdirectory,文件索引,另外一种是ramdirectory,内存索引)
<prop
key="hibernate.search.default.directory_provider">
org.hibernate.search.store.FSDirectoryProvider
</prop>
<prop key="hibernate.search.default.indexBase">
${hibernate.search.default.indexBase}
</prop>


spring的配置文件没有什么特别的,和普通ssh配置没有什么两样

第三步配struts配置文件,由于也是普通配置,没有特别之处,就不贴出来了。

第四步,写实体类,由于采用hibernate search方法搜索,所以直接利用hibernate annotation注释去定义索引的一些配置信息。关于index的基本都属于索引的配置

package com.test.model;

import static javax.persistence.GenerationType.IDENTITY;

import java.util.Date;

import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import javax.persistence.Transient;

import org.hibernate.search.annotations.Analyzer;
import org.hibernate.search.annotations.DateBridge;
import org.hibernate.search.annotations.DocumentId;
import org.hibernate.search.annotations.Field;
import org.hibernate.search.annotations.Index;
import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.Resolution;
import org.hibernate.search.annotations.Store;
import org.wltea.analyzer.lucene.IKAnalyzer;


/**
* Product entity.
*/
@Entity
@Table(name = "product", catalog = "hibernate_search_test")
@Indexed(index = "Product")
@Analyzer (impl = IKAnalyzer.class )
public class Product implements java.io.Serializable {

// Fields

/**
*
*/
private static final long serialVersionUID = -7005490272739421758L;
private Integer id;
private String proTitle;
private String proDescn;
private String proPrice;
private Integer proType;
private Date proTime;
private String findResult;

// Constructors

/** default constructor */
public Product() {
}

// Property accessors
@Id
@GeneratedValue(strategy = IDENTITY)
@Column(name = "id")
@DocumentId
public Integer getId() {
return this.id;
}

public void setId(Integer id) {
this.id = id;
}

@Column(name = "pro_title")
@Field(name = "pt", index = Index.TOKENIZED, store = Store.YES)
public String getProTitle() {
return this.proTitle;
}

public void setProTitle(String proTitle) {
this.proTitle = proTitle;
}

@Column(name = "pro_descn")
@Field(name = "pd", index = Index.TOKENIZED, store = Store.YES)
public String getProDescn() {
return this.proDescn;
}

public void setProDescn(String proDescn) {
this.proDescn = proDescn;
}

@Column(name = "pro_price")
public String getProPrice() {
return this.proPrice;
}

public void setProPrice(String proPrice) {
this.proPrice = proPrice;
}

@Column(name = "pro_type")
public Integer getProType() {
return this.proType;
}

public void setProType(Integer proType) {
this.proType = proType;
}

@Temporal(TemporalType.DATE)
@Column(name = "pro_time")
@Field(name = "t", index = Index.UN_TOKENIZED, store = Store.YES)
@DateBridge(resolution = Resolution.DAY)
public Date getProTime() {
return this.proTime;
}

public void setProTime(Date proTime) {
this.proTime = proTime;
}

//封装搜索出的高亮内容
@Transient
public String getFindResult() {
return findResult;
}

public void setFindResult(String findResult) {
this.findResult = findResult;
}
}

第六步,写service方法,包括建索引,根据关键字用索引查,过滤,设置权重,高亮等等工作
package com.test.service;

import java.io.File;
import java.io.StringReader;
import java.util.Date;
import java.util.List;

import javax.annotation.Resource;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.CachingWrapperFilter;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.hibernate.CacheMode;
import org.hibernate.FlushMode;
import org.hibernate.ScrollMode;
import org.hibernate.ScrollableResults;
import org.hibernate.search.FullTextQuery;
import org.hibernate.search.FullTextSession;
import org.hibernate.search.Search;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springside.modules.orm.hibernate.HibernateDao;
import org.springside.modules.service.EntityManager;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.test.dao.ProductDao;
import com.test.model.Product;

@Transactional
@Service
public class ProductService extends EntityManager<Product, Integer> {
@Resource(name = "productDao")
private ProductDao productDao;

@Override
protected HibernateDao<Product, Integer> getEntityDao() {
// TODO Auto-generated method stub
return productDao;
}

@SuppressWarnings("unchecked")
public List<Product> QueryByIndex(String words, String startDate,String endDate) throws Exception {
FullTextSession fullTextSession = Search.createFullTextSession(productDao.getSession());

/*Query IKQuery = IKQueryParser.parseMultiField(new String[] {
"proTitle", "proDescn" }, new String[] { words, words },
new BooleanClause.Occur[] { Occur.SHOULD, Occur.SHOULD });

Query luceneQuery = MultiFieldQueryParser.parse(new String[] { words,
words }, new String[] { "pro_title", "pro_descn" },
new BooleanClause.Occur[] { Occur.SHOULD, Occur.SHOULD },
new StandardAnalyzer());*/
BooleanQuery bQuery = new BooleanQuery();
Analyzer analyzer = new IKAnalyzer();
//设置对域采用的某种分词器的QueryParser对象
QueryParser qp;
//设置了关键字的查询您对象
//Query q;

qp = new QueryParser(Version.LUCENE_CURRENT,"pt",analyzer);
Query q1 = qp.parse(words);
q1.setBoost(1.5f);
bQuery.add(q1, Occur.SHOULD);

qp = new QueryParser(Version.LUCENE_CURRENT,"pd",analyzer);
Query q2 = qp.parse(words);
q2.setBoost(1.0f);
bQuery.add(q2, Occur.SHOULD);

FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(bQuery,Product.class);
// 添加是或者否的条件到query中
boolean filterResult = false;
BooleanQuery bQueryForFilter = new BooleanQuery();

if (!startDate.equalsIgnoreCase("") && !endDate.equalsIgnoreCase("")) {
// 时间过滤
// RangeFilter rangefilter = new RangeFilter("pro_time",
// "20090927","20090929", false, false);
// 只能使用一个过滤器,所以只能用下面的RangeQuery,然后将所有query封装到一个过滤条件中
TermRangeQuery rangeQuery = new TermRangeQuery("t",startDate,endDate,true,true);
bQueryForFilter.add(rangeQuery, BooleanClause.Occur.MUST);
filterResult = true;
}
if (filterResult) {
// 将booleanQuery封装到Filter中
Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(bQueryForFilter));
fullTextQuery.setFilter(filter);
}

List<Product> result = fullTextQuery.list();
String findResult;

//根据上边已经写好的query封装出一个查询计分器
QueryScorer qs1 = new QueryScorer(q1);
QueryScorer qs2 = new QueryScorer(q2);
//设置高亮的模板,其实就是在关键字两边加一对html的格式标签,下面是最基本的加粗。
Formatter formatter = new SimpleHTMLFormatter("<b>", "</b>");

Highlighter highlighter1 = new Highlighter(formatter,qs1);
Highlighter highlighter2 = new Highlighter(formatter,qs2);
String text;

//下面通过将上面根据关键字,过滤条件和权重排序等找出的结果集做一次循环,进行高亮,把高亮后得到的

//一个字符串,封装如每个实体类中的一个额外字段,方便在页面输出。
for(Product product:result){
text = product.getProTitle() ;
findResult = highlighter1.getBestFragment(analyzer,"pt", text);
if(findResult==null){
text = product.getProDescn() ;
highlighter2.setTextFragmenter(new SimpleFragmenter(30));
findResult = highlighter2.getBestFragment(analyzer,"pd", text);
}
product.setFindResult(findResult);
}
return result;
}

//下面的方法是用hibernate search的方法来创建索引

public void createIndexByHibernateSearch() {

long startTime = new Date().getTime();
int BATCH_SIZE = 1000;
FullTextSession s = Search.createFullTextSession(productDao.getSession());

// Transaction tr = s.beginTransaction();
s.setFlushMode(FlushMode.MANUAL);
s.setCacheMode(CacheMode.IGNORE);
ScrollableResults results = s.createQuery("from Product").setFetchSize(BATCH_SIZE).scroll(ScrollMode.FORWARD_ONLY);
int index = 0;
while (results.next()) {
index++;
s.index(results.get(0)); // index each element
if (index % BATCH_SIZE == 0) {
// s.flushToIndexes(); //apply changes to indexes
s.clear(); // clear since the queue is processed
}
}
s.clear();
long endTime = new Date().getTime();
logger.warn("建立Product索引 , 这花费了" + (endTime - startTime) + " 毫秒来把文档增加到索引里面去!");
// tr.commit();

}

//下面的方法是用lucene的方式来创建索引文件,不过用这种方式创建索引后,也只能使用lucene的方式去进行搜索

@SuppressWarnings("deprecation")
public void createIndexByLucene() {
try {
File fsDir = new File("E:\\indexes\\product");
Analyzer analyzer = new IKAnalyzer();


/* // 内存索引
RAMDirectory ramDir = new RAMDirectory();
IndexWriter ramWriter = new IndexWriter(ramDir, luceneAnalyzer,
true, IndexWriter.MaxFieldLength.UNLIMITED);
*/
IndexWriter fsWriter = new IndexWriter(
FSDirectory.open(fsDir),
analyzer,
true,
IndexWriter.MaxFieldLength.UNLIMITED
);
fsWriter.setMaxBufferedDocs(1000);
fsWriter.setMergeFactor(1000);

List<Product> productList = find("from Product");
int size = productList.size();
long startTime = new Date().getTime();
Document doc;
for (Product product : productList) {
doc = new Document();
doc.add(new Field("pro_title", product.getProTitle(),Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("pro_descn", product.getProDescn(),Field.Store.YES, Field.Index.ANALYZED));
if(product.getProTime()!=null)
doc.add(new Field("pro_time",DateTools.dateToString( product.getProTime(), Resolution.DAY),Field.Store.YES, Field.Index.NOT_ANALYZED));
fsWriter.addDocument(doc);

// 先缓存入内存索引,后写入文件索引
/* ramWriter.addDocument(doc);
int i = 1;
i++;
if (i % 100 == 0 || i == size) {
logger.warn("i:" + i);
ramWriter.close();
fsWriter.addIndexesNoOptimize(new Directory[] { ramDir });
ramWriter = new IndexWriter(ramDir, new StandardAnalyzer(),
true, IndexWriter.MaxFieldLength.UNLIMITED);
}*/

}
// 自动优化合并索引文件
fsWriter.optimize();
fsWriter.close();

long endTime = new Date().getTime();
System.out.println("一共" + size + ",这花费了" + (endTime - startTime)
+ " 毫秒来把文档增加到索引里面去!");


} catch (Exception e) {
e.printStackTrace();
}
}

public void SearchByLucene(){
createIndexByLucene();
File fsDir = new File("E:\\luceneIndexes\\product");
Analyzer analyzer = new IKAnalyzer();
try{
// 索引查询
IndexReader reader = IndexReader.open(FSDirectory.open(fsDir), true); // only searching, so read-only=true
IndexSearcher isearcher = new IndexSearcher(reader);

BooleanQuery booleanQuery = new BooleanQuery();
QueryParser parser;
Query query;

parser = new QueryParser(Version.LUCENE_CURRENT,"pro_title",analyzer);
query = parser.parse("大灯");// 检索词
query.setBoost(1.5f);
booleanQuery.add(query, Occur.SHOULD);

parser = new QueryParser(Version.LUCENE_CURRENT,"pro_descn",analyzer);
query = parser.parse("大灯");// 检索词
query.setBoost(1.0f);
booleanQuery.add(query, Occur.SHOULD);

BooleanQuery filterBooleanQuery = new BooleanQuery();
TermRangeQuery rangeQuery = new TermRangeQuery("pro_time","20090101","20091101",true,true);
filterBooleanQuery.add(rangeQuery, BooleanClause.Occur.MUST);

// 将booleanQuery封装到Filter中
Filter filter = new CachingWrapperFilter(new QueryWrapperFilter(filterBooleanQuery));

TopScoreDocCollector collector = TopScoreDocCollector.create(100,true);

isearcher.search(booleanQuery,filter,collector);

ScoreDoc[] hits = collector.topDocs(0,100).scoreDocs;
QueryScorer qs = new QueryScorer(new TermQuery(new Term("pro_title","大灯")));

for(ScoreDoc h:hits){
Document d = isearcher.doc(h.doc);
String text = d.get("pro_title") ;
Formatter formatter = new SimpleHTMLFormatter("<b>", "</b>");

Highlighter hl = new Highlighter(formatter,qs);

System.out.println(hl.getBestFragment(analyzer,"pro_title", text));
//System.out.println("doc:"+h.doc+" \tscore:"+h.score+" \t"+d.get("pro_title"));
}
System.out.println("命中:" + hits.length);
isearcher.close();

}catch(Exception e){
e.printStackTrace();
}

}

// 查看分词效果
@SuppressWarnings("deprecation")
public static void showAnalyzerResult(Analyzer analyzer, String s)
throws Exception {
StringReader reader = new StringReader(s);
TokenStream ts = analyzer.tokenStream(s, reader);
Token t = ts.next();
while (t != null) {
System.out.print(t.termText() + " ");
t = ts.next();
}
System.out.println();
}

public static void main(String[] args) {
ApplicationContext ctx = new ClassPathXmlApplicationContext("spring/applicationContext.xml");
ProductService service = (ProductService) ctx.getBean("productService");

service.SearchByLucene();

}
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值