1、生成索引
目的:索引数据目录,在指定目录生成索引文件
首先导入maven依赖
org.apache.lucene lucene-core 5.3.1 org.apache.lucene lucene-queryparser 5.3.1 org.apache.lucene lucene-analyzers-common 5.3.1 生成索引的具体代码:package com.zking.lunece;
import java.io.IOException;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.zking.dao.BlogDao;
import com.zking.util.PropertiesUtil;
/**
-
构建Lucene索引
-
1、构建索引
-
2、读取索引文件,获取命中片段
-
3、使得命中片段高亮显示
-
@author LJ
-
@Date 2018年12月11日
-
@Time 下午4:47:49
*/
public class IndexStarter {private static BlogDao blogDao = new BlogDao();
public static void main(String[] args) {
//索引输出流配置对象
IndexWriterConfig conf = new IndexWriterConfig(new SmartChineseAnalyzer());
Directory d;
IndexWriter indexWriter = null;
try {
//获取索引文件存放地址对象
d = FSDirectory.open(Paths.get(PropertiesUtil.getValue(“indexPath”)));
indexWriter = new IndexWriter(d, conf);try { //为数据库中所有数据构建索引 List<Map<String, Object>> list = blogDao.list(null, null); for (Map<String, Object> map : list) { Document doc = new Document(); //Field.Store.YES是否存储到硬盘 doc.add(new StringField("id", (String) map.get("id"), Field.Store.YES)); //TextField用于对一句话分词处理 doc.add(new TextField("title", (String) map.get("title"), Field.Store.YES)); doc.add(new StringField("url", (String) map.get("url"), Field.Store.YES)); indexWriter.addDocument(doc); } } catch (InstantiationException e) { e.printStackTrace(); } catch (IllegalAccessException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } } catch (IOException e) { e.printStackTrace(); }finally { if(indexWriter != null) { try { indexWriter.close();//关闭索引输出流 } catch (IOException e) { e.printStackTrace(); } } }
}
}
lucene.properties文件:
properties工具类:
package com.zking.util;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
/**
- properties工具类
- @author user
*/
public class PropertiesUtil {
/**
* 根据key获取value值
* @param key
* @return
*/
public static String getValue(String key){
Properties prop=new Properties();
InputStream in=new PropertiesUtil().getClass().getResourceAsStream("/lucene.properties");
try {
prop.load(in);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return prop.getProperty(key);
}
}
2、使用索引(进行分页)
从索引文件中拿数据
(1)获取输入流(通过dirReader)
(2)获取索引搜索对象(通过输入流来拿)
(3)获取查询对象(通过查询解析器来获取,解析器是通过分词器获取)
(4)获取包含关键字排前面的文档对象集合
(5)可以获取对应文档的内容
分页目前找到2种解决方案,推荐用第1种,撑住大数据情况
第1种:用searchAfter()方法,先生成一个ScoreDoc后用searchAfter方法(我先贴一下两者的区别部分)
第2种:比如查20-30条的记录,先查出前30条,后面for循环从i = 20开始就行
第1种的所有代码:
package com.zking.web;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.struts2.ServletActionContext;
import com.zking.dao.BlogDao;
import com.zking.util.PageBean;
import com.zking.util.PropertiesUtil;
import com.zking.util.StringUtils;
public class BlogAction {
private String title;
private BlogDao blogDao = new BlogDao();
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String execute() {
try {
HttpServletRequest request = ServletActionContext.getRequest();
PageBean pageBean = new PageBean();
pageBean.setRequest(request);
if (StringUtils.isBlank(title)) {
List<Map<String, Object>> blogList = this.blogDao.list(title, pageBean);
request.setAttribute("blogList", blogList);
}
else {
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
//注意:索引输入流不是new出来的,是通过目录读取工具类打开的
IndexReader indexReader = DirectoryReader
.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
//获取索引搜索对象
IndexSearcher searcher = new IndexSearcher(indexReader);
//拿一句话到索引目录的索引文件中的词库进行关键词碰撞
Query query = new QueryParser("title", analyzer).parse(title);
ScoreDoc sd;
if(pageBean.getPage() == 1) {
sd = null;
}
else {
// 查询数据,结束页面自前的数据都会查询到,但是只取本页的数据
TopDocs td = searcher.search(query, pageBean.getStartIndex());
sd = td.scoreDocs[pageBean.getStartIndex()-1];//获取上一页的最后一条数据
}
//核心方法
TopDocs topDocs = searcher.searchAfter(sd, query, 10);
// 将碰撞出来的关键词点亮
QueryScorer queryScorer = new QueryScorer(query);
// 以什么形式点亮关键词
Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'><b>", "</b></span>");
Highlighter highlighter = new Highlighter(formatter, queryScorer);
List<Map<String, Object>> blogList = new ArrayList<>();
Map<String, Object> map = null;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
pageBean.setTotal(topDocs.totalHits);
for (ScoreDoc scoreDoc : scoreDocs) {
map = new HashMap<>();
Document doc = searcher.doc(scoreDoc.doc);
map.put("id", doc.get("id"));
String highlighterTitle = doc.get("title");
if (StringUtils.isNotBlank(highlighterTitle)) {
highlighterTitle = highlighter.getBestFragment(analyzer, "title", highlighterTitle);
}
map.put("title", highlighterTitle);
map.put("url", doc.get("url"));
blogList.add(map);
}
request.setAttribute("blogList", blogList);
}
request.setAttribute("pageBean", pageBean);
} catch (Exception e) {
e.printStackTrace();
}
return "blogList";
}
}
第2种的所有代码:
package com.zking.web;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.struts2.ServletActionContext;
import com.zking.dao.BlogDao;
import com.zking.util.PageBean;
import com.zking.util.PropertiesUtil;
import com.zking.util.StringUtils;
public class BlogAction {
private String title;
private BlogDao blogDao = new BlogDao();
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String execute() {
try {
HttpServletRequest request = ServletActionContext.getRequest();
PageBean pageBean = new PageBean();
pageBean.setRequest(request);
if(StringUtils.isBlank(title)) {
List<Map<String, Object>> blogList = this.blogDao.list(title, pageBean);
request.setAttribute("blogList", blogList);
}
else {
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
IndexReader indexReader = DirectoryReader.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
IndexSearcher searcher = new IndexSearcher(indexReader);
//拿一句话到索引目录的索引文件中的词库进行关键词碰撞
Query query = new QueryParser("title", analyzer).parse(title);
//查询数据,结束页面自前的数据都会查询到,但是只取本页的数据
TopDocs topDocs = searcher.search(query , pageBean.getPage()*pageBean.getRows());
//将碰撞出来的关键词点亮
QueryScorer queryScorer = new QueryScorer(query);
//以什么形式点亮关键词
Formatter formatter = new SimpleHTMLFormatter("<span style='color:red'><b>", "</b></span>");
Highlighter highlighter = new Highlighter(formatter, queryScorer);
List<Map<String, Object>> blogList = new ArrayList<>();
Map<String, Object> map = null;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
pageBean.setTotal(topDocs.totalHits);
int end = Math.min(pageBean.getStartIndex() + pageBean.getRows(), scoreDocs.length);
for(int i = pageBean.getStartIndex();i< end;i++){
map = new HashMap<>();
Document doc = indexReader.document(scoreDocs[i].doc);
map.put("id", doc.get("id"));
String highlighterTitle = doc.get("title");
if(StringUtils.isNotBlank(highlighterTitle)) {
highlighterTitle = highlighter.getBestFragment(analyzer, "title", highlighterTitle);
}
map.put("title", highlighterTitle);
map.put("url", doc.get("url"));
blogList.add(map);
}
request.setAttribute("blogList", blogList);
}
request.setAttribute("pageBean", pageBean);
} catch (Exception e) {
e.printStackTrace();
}
return "blogList";
}
}
pageBean帮助类:
package com.zking.util;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
/**
- 分页工具类
*/
public class PageBean {
private int page = 1;// 页码
private int rows = 10;// 页大小
private int total = 0;// 总记录数
private boolean pagination = true;// 是否分页
// 保存上次查询的参数
private Map<String, String[]> paramMap;
// 保存上次查询的url
private String url;
public void setRequest(HttpServletRequest request) {
String page = request.getParameter("page");
String rows = request.getParameter("rows");
String pagination = request.getParameter("pagination");
this.setPage(page);
this.setRows(rows);
this.setPagination(pagination);
this.setUrl(request.getRequestURL().toString());
this.setParamMap(request.getParameterMap());
}
public PageBean() {
super();
}
public Map<String, String[]> getParamMap() {
return paramMap;
}
public void setParamMap(Map<String, String[]> paramMap) {
this.paramMap = paramMap;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public int getPage() {
return page;
}
public void setPage(int page) {
this.page = page;
}
public void setPage(String page) {
if(StringUtils.isNotBlank(page)) {
this.page = Integer.parseInt(page);
}
}
public int getRows() {
return rows;
}
public void setRows(String rows) {
if(StringUtils.isNotBlank(rows)) {
this.rows = Integer.parseInt(rows);
}
}
public int getTotal() {
return total;
}
public void setTotal(int total) {
this.total = total;
}
public void setTotal(String total) {
if(StringUtils.isNotBlank(total)) {
this.total = Integer.parseInt(total);
}
}
public boolean isPagination() {
return pagination;
}
public void setPagination(boolean pagination) {
this.pagination = pagination;
}
public void setPagination(String pagination) {
if(StringUtils.isNotBlank(pagination) && "false".equals(pagination)) {
this.pagination = Boolean.parseBoolean(pagination);
}
}
/**
* 最大页
* @return
*/
public int getMaxPage() {
int max = this.total/this.rows;
if(this.total % this.rows !=0) {
max ++ ;
}
return max;
}
/**
* 下一页
* @return
*/
public int getNextPage() {
int nextPage = this.page + 1;
if(nextPage > this.getMaxPage()) {
nextPage = this.getMaxPage();
}
return nextPage;
}
/**
* 上一页
* @return
*/
public int getPreviousPage() {
int previousPage = this.page -1;
if(previousPage < 1) {
previousPage = 1;
}
return previousPage;
}
/**
* 获得起始记录的下标
*
* @return
*/
public int getStartIndex() {
return (this.page - 1) * this.rows;
}
@Override
public String toString() {
return "PageBean [page=" + page + ", rows=" + rows + ", total=" + total + ", pagination=" + pagination + "]";
}
}
运行在搜索框输入“使用方法”后的效果: