Lucene直接和Hibernate整合

1、配置Hibernate拦截器

<!-- Hibernate 拦截器,用于同步更新索引 -->
 <bean id="luceneEntityInterceptor" class="com.zrar.cms.service.lucene.impl.LuceneEntityInterceptor" lazy-init="false"/>
 
<!-- Hibernate配置 -->
 <bean id="sessionFactory"
  class="org.springframework.orm.hibernate3.annotation.AnnotationSessionFactoryBean">
   <!--  配置Hibernate拦截器,用于同步索引-->
    <property name="entityInterceptor" ref="luceneEntityInterceptor" />
  <property name="dataSource" ref="dataSource" />
  <property name="lobHandler" ref="lobHandler" />
  <property name="namingStrategy">
   <bean class="org.hibernate.cfg.ImprovedNamingStrategy" />
  </property>
  <property name="hibernateProperties">
   <props>
    <prop key="hibernate.dialect">${hibernate.dialect}</prop>
    <prop key="hibernate.show_sql">${hibernate.show_sql}</prop>
    <prop key="hibernate.format_sql">${hibernate.format_sql}</prop>
   </props>
  </property>
  <property name="packagesToScan" value="com.zrar.cms.entity" />
 </bean>

这个拦截器是用于同步索引,可以在sava时创建索引,因为我只有在状态为发布时才创建,所以未加

public class LuceneEntityInterceptor extends EmptyInterceptor {
 @Autowired
 @Qualifier("contentLuceneServiceImpl")
 private ContentLuceneService contentLuceneService;
 private static final long serialVersionUID = 7319416231145791577L;
 

/*public boolean onSave(
   Object entity,
   Serializable id,
   Object[] state,
   String[] propertyNames,
   Type[] types) {
  return false;
 }*/
 // 更新数据时回调此方法
 public boolean onFlushDirty(final Object entity, Serializable id, Object[] currentState, Object[] previousState, String[] propertyNames, Type[] types) {
  super.onFlushDirty(entity, id, currentState, previousState, propertyNames, types);
   if(entity instanceof Content){
    if(((Content)entity).getContentStatus().compareTo(DictionaryStaticValues.CONTENT_STATUS_PUBLISH)==0){
     //更新索引
     new Thread(){
      public void run(){
       contentLuceneService.saveIndex(((Content)entity));
      }
     }.start();
    }
    else{//删除该索引
     final String contentid = ((Content)entity).getContentId();
     new Thread(){
      public void run(){
       contentLuceneService.deleteIndex(contentid);
      }
     }.start();
    }
  }
  return true;
 }

 // 删除
 public void onDelete(Object entity, final Serializable id, Object[] state, String[] propertyNames, Type[] types) {
  super.onDelete(entity, id, state, propertyNames, types);
  if(entity instanceof Content){
   new Thread(){
    public void run(){
     contentLuceneService.deleteIndex(id.toString());
    }
   }.start();
  }
 }

 

2、文章中去掉HTML标记的工具类,采用的是HTMLParser

public class HtmlUtil {
 /**
  * 去掉HTML标记
  * @param content
  * @return
  */
 public static String getContentText(String content) {
  if(content==null){
   return "";
  }
  try {
   Parser parser = Parser.createParser(content, "UTF-8");
   TextExtractingVisitor textExtractingVisitor = new TextExtractingVisitor();
   parser.visitAllNodesWith(textExtractingVisitor);
   return textExtractingVisitor.getExtractedText();
  } catch (ParserException e) {
   e.printStackTrace();
   return content;
  }
 }
}

 

 

分词采用的Ikanalzer,分词工具类

public class AnalyzerUtil {
 private static Analyzer analyzer;
 public static Analyzer getIkAnalyzer(){
  if(analyzer==null){
   //当为true时,分词器迚行最大词长切分 ;当为false时,分词器迚行最细粒度切
   analyzer = new IKAnalyzer(true);
  }
  return analyzer;
 }

}

 

然后是索引操作的serviceImpl类


import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

import com.zrar.cms.constant.DictionaryStaticValues;
import com.zrar.cms.dao.content.ContentDao;
import com.zrar.cms.entity.Category;
import com.zrar.cms.entity.Content;
import com.zrar.cms.entity.Topic;
import com.zrar.cms.service.lucene.ContentLuceneService;
import com.zrar.cms.util.AnalyzerUtil;
import com.zrar.cms.util.FilePathSptUtil;
import com.zrar.cms.util.HtmlUtil;
import com.zrar.cms.vo.content.ContentBean;
import com.zrar.dao.util.Page;

@Service
@Transactional
public class ContentLuceneServiceImpl implements ContentLuceneService {
 private final static Logger logger = LoggerFactory.getLogger(ContentLuceneServiceImpl.class);
 @Autowired
 @Qualifier("contentDaoImpl")
 private ContentDao contentDao;

 private String[] fileds = new String[]{ "contentTitle", "contentContext","keywords"};
 
 private Occur[] occurs =  new Occur[] { Occur.SHOULD, Occur.SHOULD,Occur.SHOULD };

 private int maxIndexCount = 200;

 public boolean deleteIndex(String contentId) {
  IndexWriter indexWriter = null;
  Directory d = null;
  try {
   d = FSDirectory.open(new File(FilePathSptUtil.LUCENE_INDEX));
   while(d!=null&&IndexWriter.isLocked(d)){//如果文件锁住,等待解锁
    Thread.sleep(1000);
    logger.error("索引已经锁住,正在等待....");
   }
   IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_30, AnalyzerUtil.getIkAnalyzer());
   indexWriter = new IndexWriter(d, indexWriterConfig);
   Term term = new Term("id", contentId);
   indexWriter.deleteDocuments(term);
   indexWriter.optimize();
   indexWriter.commit();
   logger.debug("删除文章ID:{}的索引...",contentId);
   logger.debug("共有索引{}个", indexWriter.numDocs());
   indexWriter.close();
   return true;
  } catch (CorruptIndexException e) {
   e.printStackTrace();
   logger.error("索引删除异常", e);
  } catch (LockObtainFailedException e) {
   e.printStackTrace();
   logger.error("索引删除异常", e);
  } catch (IOException e) {
   e.printStackTrace();
   logger.error("索引不存在", e);
  } catch (Exception e) {
   e.printStackTrace();
   logger.error("索引删除异常", e);
  } finally {
   if (indexWriter != null) {
    try {
     indexWriter.close();
    } catch (CorruptIndexException e) {
     e.printStackTrace();
     logger.error("索引关闭异常", e);
    } catch (IOException e) {
     e.printStackTrace();
     logger.error("索引关闭异常", e);
    }
    finally{
     try {
      if (d!=null&&IndexWriter.isLocked(d)) {    
       IndexWriter.unlock(d);  
      }
     } catch (IOException e) {
      e.printStackTrace();
      logger.error("解锁异常", e);
     }
    }
   }
  }
  return false;
 }

 
 public boolean saveIndex(Content c) {
  IndexWriter indexWriter = null;
  Directory d = null;
  try {
   d= FSDirectory.open(new File(FilePathSptUtil.LUCENE_INDEX));
   while(d!=null&&IndexWriter.isLocked(d)){//如果文件锁住,等待解锁
    Thread.sleep(1000);
    logger.error("索引已经锁住,正在等待....");
   }
   IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_31, AnalyzerUtil.getIkAnalyzer());
   indexWriter = new IndexWriter(d, conf);
   Term term = new Term("id", c.getContentId());
   indexWriter.deleteDocuments(term);//不管更新与否,先删除原来的
   
   Document doc = getDocument(c);
   indexWriter.addDocument(doc);
   indexWriter.optimize();
   indexWriter.commit();
   logger.debug("更新索引,文章ID为{}",c.getContentId());
   logger.debug("共有索引{}个", indexWriter.numDocs());
   return true;
  }  catch (CorruptIndexException e) {
   e.printStackTrace();
   logger.error("索引添加异常", e);
  } catch (LockObtainFailedException e) {
   e.printStackTrace();
   logger.error("索引添加异常", e);
  } catch (IOException e) {
   e.printStackTrace();
   logger.error("索引不存在", e);
  } catch (Exception e) {
   e.printStackTrace();
   logger.error("索引添加异常", e);
  } finally {
   if (indexWriter != null) {
    try {
     indexWriter.close();
    } catch (CorruptIndexException e) {
     e.printStackTrace();
     logger.error("索引关闭异常", e);
    } catch (IOException e) {
     e.printStackTrace();
     logger.error("索引关闭异常", e);
    }
    finally{
     try {
      if (d!=null&&IndexWriter.isLocked(d)) {    
       IndexWriter.unlock(d);  
      }
     } catch (IOException e) {
      e.printStackTrace();
      logger.error("解锁异常", e);
     }
    }
   }
  }
  return false;
 }

 public synchronized boolean createIndexs(boolean isNew) {
  File file = new File(FilePathSptUtil.LUCENE_INDEX);
  if (file.exists() && isNew) {
   if(file.isFile()){
    file.delete();
   }
   else{
    File[] f = file.listFiles();
    for(int i=0;i<f.length;i++){
     f[i].delete();
    }
   }
  } else {
   file.mkdirs();
  }
  IndexWriter indexWriter = null;
  Directory d = null;
  try {
   d = FSDirectory.open(new File(FilePathSptUtil.LUCENE_INDEX));
   IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_31, AnalyzerUtil.getIkAnalyzer());
   // 创建索引模式:CREATE,覆盖模式; APPEND,追加模式
   if (isNew) {
    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
   } else {
    conf.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
   }
   indexWriter = new IndexWriter(d, conf);
   ContentBean bean = new ContentBean();
   bean.setContentStatus(DictionaryStaticValues.CONTENT_STATUS_PUBLISH);
   int count = contentDao.count(bean);
   int i = 0;
   Page<Content> page = new Page<Content>();
   page.setPageSize(maxIndexCount);
   List<Content> list = null;
   while (count > 0) {
    page.setPageNo(i++);
    list = contentDao.findByPage(page, bean);
    for (Content c : list) {
     Document doc = getDocument(c);
     indexWriter.addDocument(doc);
    }
    count = count - maxIndexCount;
    list.clear();
    contentDao.clear();
    logger.debug("当前共有索引{}个", indexWriter.numDocs());
    Thread.sleep(100);
   }
   logger.debug("索引结束,共有索引{}个", indexWriter.numDocs());
   // 自动优化合并索引文件
   indexWriter.optimize();
   indexWriter.commit();
   return true;
  } catch (CorruptIndexException e) {
   e.printStackTrace();
   logger.error("索引添加异常", e);
  } catch (LockObtainFailedException e) {
   e.printStackTrace();
   logger.error("索引添加异常", e);
  } catch (IOException e) {
   e.printStackTrace();
   logger.error("索引不存在", e);
  } catch (Exception e) {
   e.printStackTrace();
   logger.error("索引添加异常", e);
  } finally {
   if (indexWriter != null) {
    try {
     indexWriter.close();
    } catch (CorruptIndexException e) {
     e.printStackTrace();
     logger.error("索引关闭异常", e);
    } catch (IOException e) {
     e.printStackTrace();
     logger.error("索引关闭异常", e);
    }
    finally{
     try {
      if (d!=null&&IndexWriter.isLocked(d)) {    
       IndexWriter.unlock(d);  
      }
     } catch (IOException e) {
      e.printStackTrace();
      logger.error("解锁异常", e);
     }
    }
   }
  }
  return false;
 }

 /**
  * content转换为document
  *
  * @param c
  * @return
  */
 private Document getDocument(Content c) {
  Document doc = new Document();
  doc.add(new Field("id", c.getContentId(), Store.YES, Index.NOT_ANALYZED));
  doc.add(new Field("topicId", StringUtils.trimToEmpty(c.getTopic() != null ? c.getTopic().getTopicId() : ""), Store.YES, Index.NOT_ANALYZED));
  // 可将topic名称索引
  doc.add(new Field("categoryId", StringUtils.trimToEmpty(c.getCategory() != null ? c.getCategory().getCategoryId() : ""), Store.YES, Index.NOT_ANALYZED));
  // 可将category名称索引
  doc.add(new Field("contentTitle", StringUtils.trimToEmpty(c.getContentTitle()), Store.YES, Index.ANALYZED));
  doc.add(new Field("keywords", StringUtils.trimToEmpty(c.getKeywords()), Store.YES, Index.ANALYZED));
  doc.add(new Field("contentContext", HtmlUtil.getContentText(c.getContentContext()), Store.YES, Index.ANALYZED));
  doc.add(new Field("contentDescription", StringUtils.trimToEmpty(c.getContentDescription()), Store.YES, Index.ANALYZED));
  doc.add(new Field("contentAuthor", StringUtils.trimToEmpty(c.getContentAuthor()), Store.YES, Index.NOT_ANALYZED));
  if (c.getCreateTime() != null) {
   doc.add(new Field("createTime", DateTools.dateToString(c.getCreateTime(), Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
  } else {
   doc.add(new Field("createTime", DateTools.dateToString(new Date(), Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
  }
  if (c.getPublishTime() != null) {
   doc.add(new Field("publishTime", DateTools.dateToString(c.getPublishTime(), Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
  } else {
   doc.add(new Field("publishTime", DateTools.dateToString(new Date(), Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED));
  }
  doc.add(new Field("contentHits",  String.valueOf(c.getContentHits() != null ? c.getContentHits() : 0), Store.YES, Field.Index.NOT_ANALYZED));
  doc.add(new Field("isTop", c.getIsTop() != null &&c.getIsTop() ? "true" : "false", Field.Store.YES, Field.Index.NOT_ANALYZED));
  doc.add(new Field("contentSource", StringUtils.trimToEmpty(c.getContentSource()), Store.YES, Index.ANALYZED));
  doc.add(new Field("simpleTitle1", StringUtils.trimToEmpty(c.getSimpleTitle1()), Store.YES, Index.ANALYZED));
  doc.add(new Field("simpleTitle2", StringUtils.trimToEmpty(c.getSimpleTitle2()), Store.YES, Index.ANALYZED));
  doc.add(new Field("simpleTitle3", StringUtils.trimToEmpty(c.getSimpleTitle3()), Store.YES, Index.ANALYZED));
  return doc;
 }

 
 
 /**
  * 查询指定栏目下的文章
  *
  * @param word
  * @param category
  * @return
  * @throws InvalidTokenOffsetsException
  * @throws ParseException
  * @throws IOException
  * @throws CorruptIndexException
  */
 public List<Content> likeSearch(ContentBean content, Page<Content> page){
  try {
   BooleanQuery query = new BooleanQuery();
   IndexReader reader = IndexReader.open(FSDirectory.open(new File(FilePathSptUtil.LUCENE_INDEX)));
   IndexSearcher searcher = new IndexSearcher(reader);
   // 在索引器中使用IKSimilarity相似度评估器
   searcher.setSimilarity(new IKSimilarity());
   Query likequery = null;
   if (content.getKeyword() != null && content.getKeyword().length() > 0) {
    likequery = IKQueryParser.parseMultiField(fileds, content.getKeyword(), occurs);
   } else {
    likequery = MultiFieldQueryParser.parse(Version.LUCENE_31, "*:*", fileds, occurs, AnalyzerUtil.getIkAnalyzer());
   }
   
   query.add(likequery, Occur.MUST);
   //栏目搜索
   if(content.getCategoryId()!=null&&content.getCategoryId().length>0){
    BooleanQuery booleanquery = new BooleanQuery();
    for (String category : content.getCategoryId()) {
     Query termqquery = new TermQuery(new Term("categoryId", category));
     booleanquery.add(termqquery, Occur.SHOULD);
    }
    query.add(booleanquery, Occur.MUST);
   }
   
   if(StringUtils.isNotBlank(content.getNokeyword())){//不包含关键字
    likequery = IKQueryParser.parseMultiField(fileds, content.getNokeyword(), occurs);
    query.add(likequery,Occur.MUST_NOT);
   }
   
   //日期过滤
   if(content.getMaxPublishTime()!=null||content.getMinPublishTime()!=null){
    String start = null;
    String end = null;
    if(content.getMinPublishTime()!=null){
     start = DateTools.dateToString(content.getMinPublishTime(), Resolution.SECOND);
    }
    if(content.getMaxPublishTime()!=null){
     end = DateTools.dateToString(content.getMaxPublishTime(), Resolution.SECOND);
    }
    Query q = new TermRangeQuery("publishTime", start, end, true, true);
    query.add(q,Occur.MUST);
   }
   
   return getList(query, page, searcher);
  } catch (Exception e) {
   e.printStackTrace();
   logger.error("搜索异常", e);
   page.setResult(new ArrayList<Content>());
   return new ArrayList<Content>();
  }
 }

 /**
  * Content 转换
  *
  * @throws IOException
  * @throws CorruptIndexException
  * @throws InvalidTokenOffsetsException
  */
 private List<Content> getList(Query query, Page<Content> page, IndexSearcher searcher) throws CorruptIndexException, IOException, InvalidTokenOffsetsException {
  TopScoreDocCollector topCollector = TopScoreDocCollector.create(searcher.maxDoc(), false);
  // 多字段排序,设置在前面的会优先排序
  /*SortField[] sortFields = new SortField[3];
  //true:降序  false:升序
  SortField top = new SortField("isTop", SortField.INT, true);
  SortField hits = new SortField("contentHits", SortField.INT, true);
  SortField pubtime = new SortField("publishTime", SortField.LONG, true);
  sortFields[0] = top;
  sortFields[1] = hits;
  sortFields[2] = pubtime;
  Sort sort = new Sort(sortFields);*/
  searcher.search(query, topCollector);  
  SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");// 高亮
  Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
  highlighter.setTextFragmenter(new SimpleFragmenter(100));
  page.setTotal(topCollector.getTotalHits());
  ScoreDoc[] docs = topCollector.topDocs((page.getPageNo() - 1) * page.getPageSize(), page.getPageSize()).scoreDocs;
  List<Content> list = new ArrayList<Content>();
  for (ScoreDoc scdoc : docs) {
   Document document = searcher.doc(scdoc.doc);
   TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), scdoc.doc, "contentContext", AnalyzerUtil.getIkAnalyzer());
   String contentContext = document.get("contentContext");
   String content = highlighter.getBestFragment(tokenStream, contentContext);
   Content c = new Content();
   c.setContentId(document.get("id"));
   Topic topic = new Topic();
   topic.setTopicId(document.get("topicId"));
   c.setTopic(topic);
   Category category = new Category();
   category.setCategoryId(document.get("categoryId"));
   c.setCategory(category);
   c.setContentTitle(document.get("contentTitle"));
   c.setKeywords(document.get("keywords"));
   if (StringUtils.isNotBlank(content)) {
    c.setContentContext(content);
   } else {
    c.setContentContext(contentContext.length() > 100 ? contentContext.substring(0, 100) : contentContext);
   }
   c.setContentDescription(document.get("contentDescription"));
   c.setContentAuthor(document.get("contentAuthor"));
   try {
    c.setCreateTime(DateTools.stringToDate(document.get("createTime")));
   } catch (java.text.ParseException e) {
    c.setCreateTime(new Date());
   }
   try {
    c.setPublishTime(DateTools.stringToDate(document.get("publishTime")));
   } catch (java.text.ParseException e) {
    c.setPublishTime(new Date());
   }
   c.setContentHits(Integer.valueOf(document.get("contentHits")));
   c.setIsTop(StringUtils.isNotBlank(document.get("isTop")) && Boolean.valueOf(document.get("isTop")));
   c.setContentSource(document.get("contentSource"));
   c.setSimpleTitle1(document.get("simpleTitle1"));
   c.setSimpleTitle2(document.get("simpleTitle2"));
   c.setSimpleTitle3(document.get("simpleTitle3"));
   
   list.add(c);
  }
  page.setResult(list);
  return list;
 }

}

 

 

 

 

  • 2
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 2
    评论
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值