Lucene索引实现并高亮显示（包含显示失败的处理）

最新推荐文章于 2021-02-12 12:40:30 发布

DeepLoveDeep

最新推荐文章于 2021-02-12 12:40:30 发布

阅读量497

点赞数 1

文章标签： lucene 高亮显示全文检索

本文链接：https://blog.csdn.net/baidu_41773019/article/details/108337238

版权

版本Jar:5.5.4版本----------------------需要的maven或jar去本人文档下载

import com.rjcloud.api.entity.CoordinateEntity;
import com.rjcloud.api.entity.LuenceSearchEntity;
import com.rjcloud.common.dal.PageData;
import com.rjcloud.common.util.StringUtils;
import com.rjcloud.sysweb.util.JdbcUtil;
import org.apache.commons.configuration.CompositeConfiguration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.StringReader;
import java.nio.file.FileSystems;
import java.sql.ResultSet;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;


/**
 * 基于Lucene5.5.4的数据库搜索demo
 * @author Shencq
 * @date 2020-07-09
 */
public class DbSearch {
   public static  String INDEX_PATH =null;
   public static  String JDBC_URL = null;
   public static  String USER = null;
   public static  String PWD = null;

   /**
    * 创建索引
    */
   public void creatIndex() {
      IndexWriter indexWriter = null;
      try
      {
         CompositeConfiguration config = new CompositeConfiguration();
         config.addConfiguration(new PropertiesConfiguration("luence_db.properties"));
          INDEX_PATH = config.getString("INDEX_PATH");
            File file= new File(INDEX_PATH);
            if(!file.exists()){
               file.mkdir();
            }
          JDBC_URL = config.getString("JDBC_URL");
          USER =config.getString("USER");
          PWD = config.getString("PWD");
         String sql=config.getString("SQL");
         Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(INDEX_PATH));
         //Analyzer analyzer = new StandardAnalyzer();
         Analyzer analyzer = new IKAnalyzer();//参数为true设置IK分词失效，默认为false
         IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
         indexWriter = new IndexWriter(directory, indexWriterConfig);
         indexWriter.deleteAll();// 清除以前的index

         JdbcUtil jdbc = new JdbcUtil("orcal",JDBC_URL, USER, PWD);
         ResultSet rs = jdbc.query(sql);
         while(rs.next())
         {
            /**
             * TYPE_NOT_STORED 索引，分词，不存储
             * TYPE_STORED 索引，分词，存储
             * */
            Document document = new Document();
            document.add(new Field("id", rs.getString("id"), StringField.TYPE_STORED));
            if(!StringUtils.isEmpty(rs.getString("name"))){
               document.add(new Field("name", rs.getString("name"), TextField.TYPE_STORED));
            }
            if(!StringUtils.isEmpty(rs.getString("content"))){
               document.add(new Field("content", rs.getString("content"), TextField.TYPE_STORED));
            }
            indexWriter.addDocument(document);
         }
         jdbc.closeAll();
      }
      catch (Exception e)
      {
         e.printStackTrace();
      }
      finally
      {
         try
         {
            if(indexWriter != null) indexWriter.close();
         }
         catch (Exception e)
         {
            e.printStackTrace();
         }
      }
   }

   /**
    * 搜索
    */
   public  PageData search(String keyWord,int pageIndex,int pageSize) {
      List<LuenceSearchEntity> list=new ArrayList<>();
      DirectoryReader directoryReader = null;
      try
      {
         CompositeConfiguration config = new CompositeConfiguration();
         config.addConfiguration(new PropertiesConfiguration("luence_db.properties"));
         INDEX_PATH = config.getString("INDEX_PATH");
         if(StringUtils.isEmpty(INDEX_PATH)){
            System.out.println("索引库创建失败");
            return null;
         }
         // 1、创建Directory
         Directory directory = FSDirectory.open(FileSystems.getDefault().getPath(INDEX_PATH));
         // 2、创建IndexReader
         directoryReader = DirectoryReader.open(directory);
         // 3、根据IndexReader创建IndexSearch
         IndexSearcher indexSearcher = new IndexSearcher(directoryReader);
         // 4、创建搜索的Query
         // Analyzer analyzer = new StandardAnalyzer();
         Analyzer analyzer = new IKAnalyzer(true); // 使用IK分词

         // 简单的查询，创建Query表示搜索域为content包含keyWord的文档
         //Query query = new QueryParser("content", analyzer).parse(keyWord);
         String[] fields = {"name","content"};
         // MUST 表示and，MUST_NOT 表示not ，SHOULD表示or
         BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
         // MultiFieldQueryParser表示多个域解析， 同时可以解析含空格的字符串，如果我们搜索"上海 中国"
         Query multiFieldQuery = MultiFieldQueryParser.parse(keyWord, fields, clauses, analyzer);
         if(multiFieldQuery==null){
            System.out.println("无检索到数据...");
            return null;
         }else{
            // 5、根据searcher搜索并且返回TopDocs
            TopDocs topDocs = indexSearcher.search(multiFieldQuery, 10000); // 搜索前10000条结果
            System.out.println("Luence查找共找到匹配处：" + topDocs.totalHits);
            // 6、根据TopDocs获取ScoreDoc对象
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            System.out.println("Luence查找共找到匹配文档数：" + scoreDocs.length);

            QueryScorer scorer = new QueryScorer(multiFieldQuery);
            SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<span style='background-color:red;'>","</span>");
            Highlighter highlighter = new Highlighter(htmlFormatter, scorer);
            highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));

            int start = (pageIndex - 1) * pageSize;
            int end = pageSize * pageIndex;
            if (end > scoreDocs.length) {
               end = scoreDocs.length;
            }
            for (int i = start; i < end; i++) {
               // 7、根据searcher和ScoreDoc对象获取具体的Document对象
               Document document = indexSearcher.doc(scoreDocs[i].doc);
               //TokenStream tokenStream = new SimpleAnalyzer().tokenStream("content", new StringReader(content));
               //TokenSources.getTokenStream("content", tvFields, content, analyzer, 100);
               //TokenStream tokenStream = TokenSources.getAnyTokenStream(indexSearcher.getIndexReader(), scoreDoc.doc, "content", document, analyzer);
               //System.out.println(highlighter.getBestFragment(tokenStream, content));
               //System.out.println(highlighter.getBestFragment(analyzer, "NAME",document.get("NAME")));
               // 8、根据Document对象获取需要的值
               LuenceSearchEntity luenceSearchEntity = new LuenceSearchEntity();
               luenceSearchEntity.setIds(document.get("id"));//ID
               String name =document.get("name");
               TokenStream tokenStream=analyzer.tokenStream("name", new StringReader(name));
               //getBestFragment方法用于输出摘要（即权重大的内容）
               String bestFragment = highlighter.getBestFragment(tokenStream, name);
               if(!StringUtils.isEmpty(bestFragment)){
                  //高亮成功
                  luenceSearchEntity.setName(bestFragment);//名称
               }else{
                  //高亮失败，代码高亮
                  name=name.replaceAll(keyWord,"<span style='background-color:red;'>"+keyWord+"</span>");
                  luenceSearchEntity.setName(name);//名称
               }
               String content =document.get("content");
               TokenStream ctokenStream=analyzer.tokenStream("content", new StringReader(content));
               String cbestFragment = highlighter.getBestFragment(ctokenStream, content);
               if(!StringUtils.isEmpty(cbestFragment)){
                  //高亮成功
                  luenceSearchEntity.setContent(cbestFragment);//内容
               }else{
                  //高亮失败，代码高亮
                  content=content.replaceAll(keyWord,"<span style='background-color:red;'>"+keyWord+"</span>");
                  luenceSearchEntity.setContent(content);//内容
               }
               list.add(luenceSearchEntity);
            }
            PageData listData=new PageData();
            listData.setData(list);
            listData.setStartIndex(start);
            listData.setCount(scoreDocs.length);
            listData.setTotalPage(scoreDocs.length/pageSize+1);
            listData.setTotalSize(pageSize);
            return listData;
         }

      }
      catch (Exception e)
      {
         e.printStackTrace();
         return null;
      }
      finally
      {
         try
         {
            if(directoryReader != null) directoryReader.close();
         }
         catch (Exception e)
         {
            e.printStackTrace();
            return null;
         }
      }
   }
   /*public static void main(String args[])
   {
      DbSearch demo = new DbSearch();
      //demo.creatIndex();
      System.out.println(demo.search("测试"));
   }*/
}

//定时任务创建索引文件

/**
 * Created by lenovo on 2020/4/10.
 * 创建Luence索引文件
 */
public class LuenceIndexDataJob implements Job{

    @Override
    public void execute(JobExecutionContext context) throws JobExecutionException {
        DbSearch demo = new DbSearch();
        demo.creatIndex();
    }
}

//分页查询

@RequestMapping(value = "/v1")
public void v1(HttpServletRequest request, HttpServletResponse response) throws Exception{
        int start = ParamUtil.getInteger(request, "page", 1);
        int size = ParamUtil.getInteger(request, "pagesize", 10);
        String content = ParamUtil.getString(request, "content");
       //创建索引对象
        DbSearch demo = new DbSearch();
        //调用查询方法
         PageData listData= demo.search(content,start,size);
        response.setHeader("Content-type", "application/json;charset=UTF-8");
        response.getWriter().print(JSONArray.toJSONStringWithDateFormat(listData, "yyyy-MM-dd HH:mm:ss"));
}

//pageDate类

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

public class PageData<T> implements Serializable {
    private int totalSize;
    private List<T> data;
    private int startIndex;
    private int totalPage;
    private int code;
    private String msg;
    private int count;

    public PageData() {
    }

    public PageData(int totalSize, List<T> data) {
        this.totalSize = totalSize;
        this.data = data;
    }

    public PageData(int totalSize, List<T> data, int startIndex, int totalPage) {
        this.totalSize = totalSize;
        this.count=totalSize;
        this.totalPage=totalPage;
        this.data = data;
        this.startIndex = startIndex;
    }

    public int getTotalSize() {
        return this.totalSize;
    }

    public void setTotalSize(int totalSize) {
        this.totalSize = totalSize;
    }

    public int getTotalPage() {
        return totalPage;
    }

    public void setTotalPage(int totalPage) {
        this.totalPage = totalPage;
    }

    public int getStartIndex() {
        return this.startIndex;
    }

    public void setStartIndex(int startIndex) {
        this.startIndex = startIndex;
    }

    public List<T> getData() {
        return this.data;
    }

    public void setData(List<T> data) {
        if(data!=null) {
            this.data = data;
        } else {
            this.data = new ArrayList();
        }

    }

    public int getCode() {
        return code;
    }

    public void setCode(int code) {
        this.code = code;
    }

    public String getMsg() {
        return msg;
    }

    public void setMsg(String msg) {
        this.msg = msg;
    }

    public int getCount() {
        return count;
    }

    public void setCount(int count) {
        this.count = count;
    }
}

//pom文件

<!--lucene-->
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-core</artifactId>
    <version>5.5.4</version>
</dependency>
<dependency>
    <groupId>IKAnalyzer2012_FF</groupId>
    <artifactId>IKAnalyzer2012_FF</artifactId>
    <version>IKAnalyzer2012_FF</version>
    <scope>system</scope>
    <systemPath>${basedir}/lib/IKAnalyzer2012_FF.jar</systemPath>
</dependency>
<dependency>
    <groupId>lucene-queryparser-5.5.4</groupId>
    <artifactId>lucene-queryparser-5.5.4</artifactId>
    <version>lucene-queryparser-5.5.4</version>
    <scope>system</scope>
    <systemPath>${basedir}/lib/lucene-queryparser-5.5.4.jar</systemPath>
</dependency>
<dependency>
    <groupId>org.apache.lucene</groupId>
    <artifactId>lucene-highlighter</artifactId>
    <version>5.5.4</version>
</dependency>

DeepLoveDeep

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Lucene索引实现并高亮显示（包含显示失败的处理）

版本Jar:5.5.4版本----------------------需要的maven或jar去本人文档下载import com.rjcloud.api.entity.CoordinateEntity;import com.rjcloud.api.entity.LuenceSearchEntity;import com.rjcloud.common.dal.PageData;import com.rjcloud.common.util.StringUtils;import com.rjcl
复制链接

扫一扫