lucene搜索

1.什么是lucene
Lucene是一个开放源代码的全文检索引擎工具包,提供了一个简单却强大的应用程式接口,能够做全文索引和搜寻。
2.依赖

<lucene-analyzers-common.version>4.7.2</lucene-analyzers-common.version>
<lucene-core.version>4.7.2</lucene-core.version>
<lucene-facet.version>4.7.2</lucene-facet.version>
<lucene-highlighter.version>4.7.2</lucene-highlighter.version>
<lucene-queries.version>4.7.2</lucene-queries.version>
<lucene-queryparser.version>4.7.2</lucene-queryparser.version>

<!-- lucene begin -->
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-analyzers-common</artifactId>
	<version>${lucene-analyzers-common.version}</version>
</dependency>
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-core</artifactId>
	<version>${lucene-core.version}</version>
</dependency>
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-facet</artifactId>
	<version>${lucene-facet.version}</version>
</dependency>
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-highlighter</artifactId>
	<version>${lucene-highlighter.version}</version>
</dependency>
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-queries</artifactId>
	<version>${lucene-queries.version}</version>
</dependency>
<dependency>
	<groupId>org.apache.lucene</groupId>
	<artifactId>lucene-queryparser</artifactId>
	<version>${lucene-queryparser.version}</version>
</dependency>
<!-- lucene end -->

<!-- ikanalyzer 中文分词器  -->
<dependency>
	<groupId>com.janeluo</groupId>
	<artifactId>ikanalyzer</artifactId>
	<version>2012_u6</version>
	<exclusions>
		<exclusion>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-core</artifactId>
		</exclusion>
		<exclusion>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-queryparser</artifactId>
		</exclusion>
		<exclusion>
			<groupId>org.apache.lucene</groupId>
			<artifactId>lucene-analyzers-common</artifactId>
		</exclusion>
	</exclusions>
</dependency>

3.使用demo

package com.tbtx.imaijia.controller;

import com.tbtx.imaijia.biz.ArticleBiz;
import com.tbtx.imaijia.domain.bo.ArticleBO;
import com.tbtx.imaijia.domain.query.ContentArticleQuery;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.annotation.Autowired;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * @author chengxn
 * @date 2019/10/29
 */
public class LuceneTest {
   /**
    * 添加索引库
    * @throws Exception
    */
   public static void index() throws Exception {

       List<ArticleBO> list = new ArrayList<>();
       for(int i = 1; i<1000; i++){
           ArticleBO bo = new ArticleBO();
           bo.setId(Long.valueOf(i));
           bo.setTitle("标题");
           bo.setSummary("摘要");
           bo.setContent("内容是我呀互联网技术更新速度高级管理员产品技术");
           list.add(bo);
       }
       // 将采集到的数据封装到Document对象中
       List<Document> docList = new ArrayList<>();
       Document document;
       for (ArticleBO ArticleBO : list) {
           document = new Document();
           // store:如果是yes,则说明存储到文档域中
           // 不分词, 索引,存储
           // 文章id
           LongField id = new LongField("id", ArticleBO.getId(), Field.Store.YES);
           // 标题
           TextField title = new TextField("title", ArticleBO.getTitle()== null ? "" :ArticleBO.getTitle(), Field.Store.YES);
           // 摘要
           TextField summary = new TextField("summary", ArticleBO.getSummary() == null?"":ArticleBO.getSummary(), Field.Store.YES);
           // 正文内容
           TextField content = new TextField("content", ArticleBO.getContent() == null?"":ArticleBO.getContent(), Field.Store.YES);
           // 作者
           StringField author = new StringField("author", ArticleBO.getAuthor() == null?"":ArticleBO.getAuthor() , Field.Store.YES);
           // 标签
           StringField tag = new StringField("tag", ArticleBO.getTag() == null? "": ArticleBO.getTag(), Field.Store.YES);
           // 图片
           StringField image = new StringField("image", ArticleBO.getImage() == null ?"":ArticleBO.getImage(), Field.Store.YES);
           // 将field域设置到Document对象中
           document.add(id);
           document.add(title);
           document.add(summary);
           document.add(content);
           document.add(author);
           document.add(tag);
           document.add(image);
           docList.add(document);
       }
       // 创建IndexWriter
       IndexWriter writer = getIndexWriter();
       // 通过IndexWriter对象将Document写入到索引库中
       for (Document doc : docList) {
           writer.addDocument(doc);
       }
       // 关闭writer
       writer.close();
   }

   public static IndexWriter getIndexWriter() throws Exception {
       // 创建分词器,标准分词器
       Analyzer analyzer = new IKAnalyzer();

       // 创建IndexWriter
       IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_47, analyzer);
       // 指定索引库的地址
       File indexFile = new File(getIndexPath());
       Directory directory = FSDirectory.open(indexFile);
       IndexWriter writer = new IndexWriter(directory, cfg);

       return writer;
   }

   private static String getIndexPath() {
       // 获取索引的目录
       String path = "/Users/tbtx/Desktop/Lucene索引库/cxn";

       // 不存在就创建目录
       File file = new File(path);
       if (!file.exists()) {
           file.mkdirs();
       }
       return path;
   }

   /**
    * 清空索引库
    */
   public static void deleteAll() {
       IndexWriter writer = null;
       try {
           // 获取IndexWriter
           writer = getIndexWriter();

           // 删除所有的数据
           writer.deleteAll();

           int cnt = writer.numDocs();
           System.out.println("索引条数\t" + cnt);

           // 提交事物
           writer.commit();
       } catch (Exception e) {
           e.printStackTrace();
       } finally {
           closeWriter(writer);
       }
   }

   private static void closeWriter(IndexWriter writer) {
       try {
           if (writer != null) {
               writer.close();
           }
       } catch (IOException e) {
           e.printStackTrace();
       }
   }

   /**
    * 查询
    * @param query
    */
   public static void excQuery(Query query){
       //查询
       IndexReader reader  = null;
       try {
           reader = getIndexReader();

           //获取查询数据
           IndexSearcher searcher = new IndexSearcher(reader);

           //检索数据
           TopDocs topDocs = searcher.search(query, 100);
           int totalSize = topDocs.totalHits;
           System.out.println("总数"+totalSize+"条");
           for(ScoreDoc scoreDoc : topDocs.scoreDocs){
               Document doc = reader.document(scoreDoc.doc);
               System.out.println(doc.get("id")+":"+doc.get("title")+":"+doc.get("summary")+":"+doc.get("content"));
           }
       } catch (Exception e) {
           e.printStackTrace();
       }finally{
           closeReader(reader);
       }
   }

   public static IndexReader getIndexReader() throws Exception {
       // 创建IndexWriter
       String path = getIndexPath();
       FSDirectory fs = FSDirectory.open(new File(path));
       // 获取到读
       return DirectoryReader.open(fs);
   }

   public static void closeReader(IndexReader reader) {
       try {
           if (reader != null) {
               reader.close();
           }
       } catch (IOException e) {
           e.printStackTrace();
       }
   }

   public static void main(String[] args) throws Exception {
       //清空索引库
        deleteAll();
        //添加索引库
       index();


           // 创建query对象
           // 使用QueryParser搜索时,需要指定分词器,搜索时的分词器要和索引时的分词器一致
           // 第一个参数:默认搜索的域的名称
           QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,new String[]{"title","content"}, new IKAnalyzer());
           // 通过queryparser来创建query对象
           // 参数:输入的lucene的查询语句(关键字一定要大写)
           Query query = parser.parse("互联网技术");
           excQuery(query);
   }

}

4.项目中使用会有分页及高亮需求

package com.tbtx.imaijia.biz.impl;

import com.tbtx.imaijia.biz.LuceneBiz;
import com.tbtx.imaijia.domain.bo.ArticleBO;
import com.tbtx.imaijia.domain.query.SearchQuery;
import com.tbtx.utils.model.PageResult;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;


/**
* @author chengxn
* @date 2019/10/30
*/
@Service
public class LuceneBizImpl implements LuceneBiz {

   @Value("${lucene.search.index.path}")
   private String indexPath;

   @Override
   public void addIndex(List<ArticleBO> articleBOList) throws Exception {
       // 将采集到的数据封装到Document对象中
       List<Document> docList = new ArrayList<>();
       Document document;
       for (ArticleBO ArticleBO : articleBOList) {
           document = new Document();
           // 文章id(分词,索引,存储)
           LongField id = new LongField("id", ArticleBO.getId(), Field.Store.YES);
           // 标题(分词,索引,存储)
           TextField title = new TextField("title", ArticleBO.getTitle()== null ? "" :ArticleBO.getTitle(), Field.Store.YES);
           // 摘要(分词,索引,存储)
           TextField summary = new TextField("summary", ArticleBO.getSummary() == null?"":ArticleBO.getSummary(), Field.Store.YES);
           // 正文内(分词,索引,不存储)
           TextField content = new TextField("content", ArticleBO.getContent() == null?"":ArticleBO.getContent(), Field.Store.NO);
           // 作者(分词,索引,存储)
           TextField author = new TextField("author", ArticleBO.getAuthor() == null?"":ArticleBO.getAuthor() , Field.Store.YES);
           // 标签(分词,索引,存储)
           TextField tag = new TextField("tag", ArticleBO.getTag() == null? "": ArticleBO.getTag(), Field.Store.YES);
           // 图片(不分词,索引,存储)
           StringField image = new StringField("image", ArticleBO.getImage() == null ?"":ArticleBO.getImage(), Field.Store.YES);
           // 栏目名称(不分词,索引,存储)
           StringField columnName = new StringField("columnName", ArticleBO.getColumnName() == null ?"":ArticleBO.getColumnName(), Field.Store.YES);
           //发布时间
           Field publishTime = new Field("publishTime", DateTools.dateToString(ArticleBO.getPublishTime(),  DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.NOT_ANALYZED);
           // 将field域设置到Document对象中
           document.add(id);
           document.add(title);
           document.add(summary);
           document.add(content);
           document.add(author);
           document.add(tag);
           document.add(image);
           document.add(columnName);
           document.add(publishTime);
           docList.add(document);
       }
       // 创建IndexWriter
       IndexWriter writer = getIndexWriter();
       // 通过IndexWriter对象将Document写入到索引库中
       for (Document doc : docList) {
           writer.addDocument(doc);
       }
       // 关闭writer
       writer.close();
   }

   @Override
   public void updateIndex(List<ArticleBO> articleBOList) throws Exception {
       deleteAll();
       addIndex(articleBOList);
   }

   @Override
   public PageResult<ArticleBO> excQuery(SearchQuery searchQuery) throws ParseException {
       PageResult<ArticleBO> resultPage = new PageResult<>(new ArrayList<ArticleBO>());
       List<ArticleBO> articleBOList = new ArrayList<>();

       // 使用QueryParser搜索时,需要指定分词器,搜索时的分词器要和索引时的分词器一致
       Analyzer analyzer =  new IKAnalyzer();
       QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,new String[]{"title","summary","content","tag"}, analyzer);
       Query query = parser.parse(searchQuery.getKeyWord());
       //查询
       IndexReader reader  = null;
       try {
           reader = getIndexReader();
           //获取查询数据
           IndexSearcher indexSearcher = new IndexSearcher(reader);
           TopDocs topDocs = indexSearcher.search(query, searchQuery.getPageNum()*searchQuery.getPageSize());
           //数据分页
          // System.out.println("查询到的条数"+topDocs.totalHits);
           ScoreDoc [] scores = topDocs.scoreDocs;
           int start = (searchQuery.getPageNum() -1)*searchQuery.getPageSize();
           int end = topDocs.totalHits;
           //高亮处理
           //如果不指定参数的话,默认是加粗,即<b><b/>
           SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>");
           //计算得分,会初始化一个查询结果最高的得分
           QueryScorer scorer = new QueryScorer(query);
           //根据这个得分计算出一个片段
           Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
           Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
           //设置一下要显示的片段
           highlighter.setTextFragmenter(fragmenter);
           // 这个100是指定关键字字符串的context的长度,你可以自己设定,因为不可能返回整篇正文内容
          // highlighter.setTextFragmenter(new SimpleFragmenter(100));

           for(int i=start;i<end || i<searchQuery.getPageNum()*searchQuery.getPageSize();i++){
               Document doc = reader.document(scores[i].doc);
               TokenStream tokenStream1 = analyzer.tokenStream("summary", new StringReader(doc.get("summary")));
               String summary = highlighter.getBestFragment(tokenStream1, doc.get("summary"));
               TokenStream tokenStream2 = analyzer.tokenStream("title", new StringReader(doc.get("title")));
               String title = highlighter.getBestFragment(tokenStream2, doc.get("title"));

               ArticleBO articleBO = new ArticleBO();
               articleBO.setId(Long.parseLong(doc.get("id")));
               articleBO.setAuthor(doc.get("author"));
               articleBO.setTag(doc.get("tag"));
               articleBO.setImage(doc.get("image"));
               articleBO.setColumnName(doc.get("columnName"));
               articleBO.setPublishTime(DateTools.stringToDate(doc.get("publishTime")));
               articleBO.setTitle(title==null?doc.get("title"):title);
               articleBO.setSummary(summary==null?doc.get("summary"):summary);
               // System.out.println(doc.get("publishTime"));
              // System.out.println(doc.get("id")+":"+doc.get("author")+":"+doc.get("tag")+":"+doc.get("title")+":"+doc.get("summary"));
               articleBOList.add(articleBO);
           }
           resultPage.setTotal(Long.valueOf(topDocs.totalHits));
           resultPage.setDatas(articleBOList);
       } catch (Exception e) {
           e.printStackTrace();
       }finally{
           closeReader(reader);
       }
       return resultPage;
   }

   private IndexWriter getIndexWriter() throws Exception {
       // 创建分词器,标准分词器
       Analyzer analyzer = new IKAnalyzer();
       // 创建IndexWriter
       IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_47, analyzer);
       // 指定索引库的地址
       File indexFile = new File(getIndexPath());
       Directory directory = FSDirectory.open(indexFile);
       IndexWriter writer = new IndexWriter(directory, cfg);

       return writer;
   }

   private String getIndexPath() {
       // 获取索引的目录,不存在就创建目录
       String path = indexPath;
       File file = new File(path);
       if (!file.exists()) {
           file.mkdirs();
       }
       return path;
   }

   private void closeWriter(IndexWriter writer) {
       try {
           if (writer != null) {
               writer.close();
           }
       } catch (IOException e) {
           e.printStackTrace();
       }
   }

   private IndexReader getIndexReader() throws Exception {
       // 创建IndexWriter
       String path = getIndexPath();
       FSDirectory fs = FSDirectory.open(new File(path));
       // 获取到读
       return DirectoryReader.open(fs);
   }

   private void closeReader(IndexReader reader) {
       try {
           if (reader != null) {
               reader.close();
           }
       } catch (IOException e) {
           e.printStackTrace();
       }
   }

   private void deleteAll() {
       IndexWriter writer = null;
       try {
           // 获取IndexWriter
           writer = getIndexWriter();
           // 删除所有的数据
           writer.deleteAll();
           int cnt = writer.numDocs();
           //System.out.println("索引条数" + cnt);
           // 提交事物
           writer.commit();
       } catch (Exception e) {
           e.printStackTrace();
       } finally {
           closeWriter(writer);
       }
   }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
系统根据B/S,即所谓的电脑浏览器/网络服务器方式,运用Java技术性,挑选MySQL作为后台系统。系统主要包含对客服聊天管理、字典表管理、公告信息管理、金融工具管理、金融工具收藏管理、金融工具银行卡管理、借款管理、理财产品管理、理财产品收藏管理、理财产品银行卡管理、理财银行卡信息管理、银行卡管理、存款管理、银行卡记录管理、取款管理、转账管理、用户管理、员工管理等功能模块。 文中重点介绍了银行管理的专业技术发展背景和发展状况,随后遵照软件传统式研发流程,最先挑选适用思维和语言软件开发平台,依据需求分析报告模块和设计数据库结构,再根据系统功能模块的设计制作系统功能模块图、流程表和E-R图。随后设计架构以及编写代码,并实现系统能模块。最终基本完成系统检测和功能测试。结果显示,该系统能够实现所需要的作用,工作状态没有明显缺陷。 系统登录功能是程序必不可少的功能,在登录页面必填的数据有两项,一项就是账号,另一项数据就是密码,当管理员正确填写并提交这二者数据之后,管理员就可以进入系统后台功能操作区。进入银行卡列表,管理员可以进行查看列表、模糊搜索以及相关维护等操作。用户进入系统可以查看公告和模糊搜索公告信息、也可以进行公告维护操作。理财产品管理页面,管理员可以进行查看列表、模糊搜索以及相关维护等操作。产品类型管理页面,此页面提供给管理员的功能有:新增产品类型,修改产品类型,删除产品类型。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值