Lucene搜索引擎入门以及分页

1、生成索引
目的:索引数据目录,在指定目录生成索引文件

首先导入maven依赖

org.apache.lucene lucene-core 5.3.1 org.apache.lucene lucene-queryparser 5.3.1 org.apache.lucene lucene-analyzers-common 5.3.1 生成索引的具体代码:

package com.zking.lunece;

import java.io.IOException;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.zking.dao.BlogDao;
import com.zking.util.PropertiesUtil;

/**

  • 构建Lucene索引

  • 1、构建索引

  • 2、读取索引文件,获取命中片段

  • 3、使得命中片段高亮显示

  • @author LJ

  • @Date 2018年12月11日

  • @Time 下午4:47:49
    */
    public class IndexStarter {

    private static BlogDao blogDao = new BlogDao();

    public static void main(String[] args) {
    //索引输出流配置对象
    IndexWriterConfig conf = new IndexWriterConfig(new SmartChineseAnalyzer());
    Directory d;
    IndexWriter indexWriter = null;
    try {
    //获取索引文件存放地址对象
    d = FSDirectory.open(Paths.get(PropertiesUtil.getValue(“indexPath”)));
    indexWriter = new IndexWriter(d, conf);

     	try {
     		//为数据库中所有数据构建索引
     		List<Map<String, Object>> list = blogDao.list(null, null);
     		for (Map<String, Object> map : list) {
     			Document doc = new Document();
     			//Field.Store.YES是否存储到硬盘
     			doc.add(new StringField("id", (String) map.get("id"), Field.Store.YES));
     			//TextField用于对一句话分词处理
     			doc.add(new TextField("title", (String) map.get("title"), Field.Store.YES));
     			doc.add(new StringField("url", (String) map.get("url"), Field.Store.YES));
     			
     			indexWriter.addDocument(doc);
     		}
     	} catch (InstantiationException e) {
     		e.printStackTrace();
     	} catch (IllegalAccessException e) {
     		e.printStackTrace();
     	} catch (SQLException e) {
     		e.printStackTrace();
     	}
     } catch (IOException e) {
     	e.printStackTrace();
     }finally {
     	if(indexWriter != null) {
     		try {
     			indexWriter.close();//关闭索引输出流
     		} catch (IOException e) {
     			e.printStackTrace();
     		}
     	}
     }
    

    }
    }
    lucene.properties文件:

properties工具类:

package com.zking.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

/**

  • properties工具类
  • @author user

*/
public class PropertiesUtil {

/**
 * 根据key获取value值
 * @param key
 * @return
 */
public static String getValue(String key){
	Properties prop=new Properties();
	InputStream in=new PropertiesUtil().getClass().getResourceAsStream("/lucene.properties");
	try {
		prop.load(in);
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	return prop.getProperty(key);
}

}
2、使用索引(进行分页)
从索引文件中拿数据

(1)获取输入流(通过dirReader)

(2)获取索引搜索对象(通过输入流来拿)

(3)获取查询对象(通过查询解析器来获取,解析器是通过分词器获取)

(4)获取包含关键字排前面的文档对象集合

(5)可以获取对应文档的内容

分页目前找到2种解决方案,推荐用第1种,撑住大数据情况

第1种:用searchAfter()方法,先生成一个ScoreDoc后用searchAfter方法(我先贴一下两者的区别部分)

第2种:比如查20-30条的记录,先查出前30条,后面for循环从i = 20开始就行

第1种的所有代码:

package com.zking.web;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.struts2.ServletActionContext;

import com.zking.dao.BlogDao;
import com.zking.util.PageBean;
import com.zking.util.PropertiesUtil;
import com.zking.util.StringUtils;

public class BlogAction {
private String title;
private BlogDao blogDao = new BlogDao();

public String getTitle() {
	return title;
}

public void setTitle(String title) {
	this.title = title;
}

public String execute() {
	try {
		HttpServletRequest request = ServletActionContext.getRequest();
		PageBean pageBean = new PageBean();
		pageBean.setRequest(request);

		if (StringUtils.isBlank(title)) {
			List<Map<String, Object>> blogList = this.blogDao.list(title, pageBean);
			request.setAttribute("blogList", blogList);
		} 
		else {
			SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
			//注意:索引输入流不是new出来的,是通过目录读取工具类打开的
			IndexReader indexReader = DirectoryReader
					.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
			//获取索引搜索对象
			IndexSearcher searcher = new IndexSearcher(indexReader);
			//拿一句话到索引目录的索引文件中的词库进行关键词碰撞
			Query query = new QueryParser("title", analyzer).parse(title);
			ScoreDoc sd;
			if(pageBean.getPage() == 1) {
				sd = null;
			}
			else {
				// 查询数据,结束页面自前的数据都会查询到,但是只取本页的数据
				TopDocs td = searcher.search(query, pageBean.getStartIndex());
				sd = td.scoreDocs[pageBean.getStartIndex()-1];//获取上一页的最后一条数据
			}
			
			//核心方法
			TopDocs topDocs = searcher.searchAfter(sd, query, 10);
			// 将碰撞出来的关键词点亮
			QueryScorer queryScorer = new QueryScorer(query);
			// 以什么形式点亮关键词
			Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'><b>", "</b></span>");
			Highlighter highlighter = new Highlighter(formatter, queryScorer);

			List<Map<String, Object>> blogList = new ArrayList<>();
			Map<String, Object> map = null;

			ScoreDoc[] scoreDocs = topDocs.scoreDocs;

			pageBean.setTotal(topDocs.totalHits);
			for (ScoreDoc scoreDoc : scoreDocs) {
				map = new HashMap<>();
				Document doc = searcher.doc(scoreDoc.doc);

				map.put("id", doc.get("id"));

				String highlighterTitle = doc.get("title");
				if (StringUtils.isNotBlank(highlighterTitle)) {
					highlighterTitle = highlighter.getBestFragment(analyzer, "title", highlighterTitle);
				}
				map.put("title", highlighterTitle);
				map.put("url", doc.get("url"));
				blogList.add(map);
			}
			request.setAttribute("blogList", blogList);
		}
		request.setAttribute("pageBean", pageBean);
	} catch (Exception e) {
		e.printStackTrace();
	}
	return "blogList";
}

}
第2种的所有代码:

package com.zking.web;

import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.servlet.http.HttpServletRequest;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.struts2.ServletActionContext;

import com.zking.dao.BlogDao;
import com.zking.util.PageBean;
import com.zking.util.PropertiesUtil;
import com.zking.util.StringUtils;

public class BlogAction {
private String title;
private BlogDao blogDao = new BlogDao();

public String getTitle() {
	return title;
}

public void setTitle(String title) {
	this.title = title;
}

public String execute() {
	try {
		HttpServletRequest request = ServletActionContext.getRequest();
		PageBean pageBean = new PageBean();
		pageBean.setRequest(request);
		
		if(StringUtils.isBlank(title)) {
			List<Map<String, Object>> blogList = this.blogDao.list(title, pageBean);
			request.setAttribute("blogList", blogList);
		}
		else {
			SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
			IndexReader indexReader = DirectoryReader.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
			IndexSearcher searcher = new IndexSearcher(indexReader);
			//拿一句话到索引目录的索引文件中的词库进行关键词碰撞
			Query query = new QueryParser("title", analyzer).parse(title);
			//查询数据,结束页面自前的数据都会查询到,但是只取本页的数据
			TopDocs topDocs = searcher.search(query , pageBean.getPage()*pageBean.getRows());
			
			//将碰撞出来的关键词点亮
			QueryScorer queryScorer = new QueryScorer(query);
			//以什么形式点亮关键词
			Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'><b>", "</b></span>");
			Highlighter highlighter = new Highlighter(formatter, queryScorer);
			
			List<Map<String, Object>> blogList = new ArrayList<>();
			Map<String, Object> map = null;
			
			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
			pageBean.setTotal(topDocs.totalHits);
			int end = Math.min(pageBean.getStartIndex() + pageBean.getRows(), scoreDocs.length);
			for(int i = pageBean.getStartIndex();i< end;i++){
				map = new HashMap<>();
                Document doc = indexReader.document(scoreDocs[i].doc);
                
                map.put("id", doc.get("id"));
				
				String highlighterTitle = doc.get("title");
				if(StringUtils.isNotBlank(highlighterTitle)) {
					highlighterTitle = highlighter.getBestFragment(analyzer, "title", highlighterTitle);
				}
				map.put("title", highlighterTitle);
				map.put("url", doc.get("url"));
				blogList.add(map);
            }
			request.setAttribute("blogList", blogList);
		}
		request.setAttribute("pageBean", pageBean);
	} catch (Exception e) {
		e.printStackTrace();
	}
	return "blogList";
}

}
pageBean帮助类:

package com.zking.util;

import java.util.Map;

import javax.servlet.http.HttpServletRequest;

/**

  • 分页工具类

*/
public class PageBean {

private int page = 1;// 页码

private int rows = 10;// 页大小

private int total = 0;// 总记录数

private boolean pagination = true;// 是否分页

// 保存上次查询的参数
private Map<String, String[]> paramMap;
// 保存上次查询的url
private String url;

public void setRequest(HttpServletRequest request) {
	String page = request.getParameter("page");
	String rows = request.getParameter("rows");
	String pagination = request.getParameter("pagination");
	this.setPage(page);
	this.setRows(rows);
	this.setPagination(pagination);
	this.setUrl(request.getRequestURL().toString());
	this.setParamMap(request.getParameterMap());
}

public PageBean() {
	super();
}

public Map<String, String[]> getParamMap() {
	return paramMap;
}

public void setParamMap(Map<String, String[]> paramMap) {
	this.paramMap = paramMap;
}

public String getUrl() {
	return url;
}

public void setUrl(String url) {
	this.url = url;
}

public int getPage() {
	return page;
}

public void setPage(int page) {
	this.page = page;
}

public void setPage(String page) {
	if(StringUtils.isNotBlank(page)) {
		this.page = Integer.parseInt(page);
	}
}

public int getRows() {
	return rows;
}

public void setRows(String rows) {
	if(StringUtils.isNotBlank(rows)) {
		this.rows = Integer.parseInt(rows);
	}
}

public int getTotal() {
	return total;
}

public void setTotal(int total) {
	this.total = total;
}

public void setTotal(String total) {
	if(StringUtils.isNotBlank(total)) {
		this.total = Integer.parseInt(total);
	}
}

public boolean isPagination() {
	return pagination;
}

public void setPagination(boolean pagination) {
	this.pagination = pagination;
}

public void setPagination(String pagination) {
	if(StringUtils.isNotBlank(pagination) && "false".equals(pagination)) {
		this.pagination = Boolean.parseBoolean(pagination);
	}
}

/**
 * 最大页
 * @return
 */
public int getMaxPage() {
	int max = this.total/this.rows;
	if(this.total % this.rows !=0) {
		max ++ ;
	}
	return max;
}

/**
 * 下一页
 * @return
 */
public int getNextPage() {
	int nextPage = this.page + 1;
	if(nextPage > this.getMaxPage()) {
		nextPage = this.getMaxPage();
	}
	return nextPage;
}

/**
 * 上一页
 * @return
 */
public int getPreviousPage() {
	int previousPage = this.page -1;
	if(previousPage < 1) {
		previousPage = 1;
	}
	return previousPage;
}
	

/**
 * 获得起始记录的下标
 * 
 * @return
 */
public int getStartIndex() {
	return (this.page - 1) * this.rows;
}

@Override
public String toString() {
	return "PageBean [page=" + page + ", rows=" + rows + ", total=" + total + ", pagination=" + pagination + "]";
}

}
运行在搜索框输入“使用方法”后的效果:

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值