博客系统前台提供搜索功能,希望不仅仅是通过文章标题、摘要来模糊搜索,而是可以跟文章内容进行匹配。由于文章内容在数据库中使用blob字段存储,无法使用数据库进行模糊查询。于是决定采用Lucene全文检索技术。
pom.xml依赖
使用ikanalyzer分词器,能很好的对中文进行分词并建立索引。
<!-- https://mvnrepository.com/artifact/com.janeluo/ikanalyzer -->
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
Lucene建立索引
查询文章信息,并对文章标题、作者、摘要、内容、主键建立索引,供查询使用
/**
* @throws IOException
* @Title:genIndex
* @Description:生成索引
* @param :@param path 索引文件存放路径
* @return :void
* @throws
*/
public void genIndex(String path) throws IOException {
// 当前时间戳
long start = System.currentTimeMillis();
// 索引生成次数统计
createCount++;
// 第一次建立索引,删除原有的文件,并重新建立文件夹
if (createCount == 1) {
new File(path).deleteOnExit();
new File(path).mkdirs();
}
//索引存放的位置,设置在当前目录中
Directory directory = FSDirectory.open(new File(path));
// 中文分词器
Analyzer analyzer = new IKAnalyzer();
//创建索引写入配置
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//创建索引写入对象
IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
//创建Document对象,存储索引
Document doc;
List<Map<String, Object>> articleList = this.genData();
try {
/**
* 循环文章列表建立索引
*/
for (Map<String, Object> article : articleList) {
doc = new Document();
// 文章标题
Field f_TITLE = new TextField("TITLE", (String) article.get("TITLE"), Field.Store.YES);
doc.add(f_TITLE);
// 文章作者
Field f_AUTHOR = new TextField("AUTHOR", (String) article.get("AUTHOR"), Field.Store.YES);
doc.add(f_AUTHOR);
// 文章摘要
Field f_ABSTRACT = new TextField("ABSTRACT", (String) article.get("ABSTRACT"), Field.Store.YES);
doc.add(f_ABSTRACT);
// 文章内容
Field f_CONTENT = new TextField("CONTENT", (String) article.get("CONTENT"), Field.Store.YES);
doc.add(f_CONTENT);
// 文章id
Field f_ARTICLEID = new TextField("ARTICLEID", "" + article.get("ARTICLEID"), Field.Store.YES);
doc.add(f_ARTICLEID);
if (createCount == 1) {
indexWriter.addDocument(doc);
} else {
Term term = new Term("ARTICLEID", "" + article.get("ARTICLEID"));
indexWriter.updateDocument(term, doc);
}
indexWriter.commit();
}
logger.info("==【GenIndex】第" + createCount + "次更新索引,条 数:"
+ articleList.size());
long costTime = System.currentTimeMillis() - start;
logger.info("==【GenIndex】更新索引用时:" + costTime + " ms");
} catch (Exception e) {
logger.error("【GeneratorIndex】建立索引失败" + e);
throw e;
} finally {
if (indexWriter != null) {
indexWriter.close();
}
}
}
Lucene查询
搜索标题、摘要、作者、内容中是否包含用户输入的内容,如果包含则将文章主键对应的文章信息返回前台展示。
/**
* @throws Exception
* @Title:searchArticle
* @Description:检索文章
* @param :@return
* @return :String
* @throws
*/
public Map<String, Object> searchArticle(String queryContent, String path) throws Exception {
logger.info("【SearchIndex.searchArticle】检索文章:" + queryContent);
try {
//queryContent += "*";
// 中文分词器
Analyzer analyzer = new IKAnalyzer();
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_47,
new String[] {"TITLE", "AUTHOR", "ABSTRACT", "CONTENT"}, analyzer);
Query query = queryParser.parse(queryContent);
//索引存放的位置,设置在当前目录中
Directory directory = FSDirectory.open(new File(path));
//创建索引的读取器
IndexReader indexReader = DirectoryReader.open(directory);
//创建一个索引的查找器,来检索索引库
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//执行查询,并打印查询到的记录数
TopDocs topDocs = indexSearcher.search(query, 100);
//打印查询到的记录数
logger.info("总共查询到" + topDocs.totalHits + "个文档");
// 取的文章id串
String articeIds = "";
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
//取得对应的文档对象
Document document = indexSearcher.doc(scoreDoc.doc);
articeIds += document.get("ARTICLEID") + ",";
}
if (topDocs.totalHits == 0) {
articeIds = "0";
}
// 查询条件,id串
Map<String, Object> params = new HashMap<>();
params.put("ARTICLEID", articeIds);
return articleManagerServiceImpl.getArticlePage(params);
} catch (Exception e) {
logger.error("【SearchIndex.searchArticle】检索文章出错:" + e);
throw e;
}
}
定时任务
上述代码已经完成索引的建立以及查找,但还存在问题:何时建立索引?文章更新后如何索引如何更新?这里我采用quartz定时任务配置一个定时器,每5分钟更新一次索引,即重新查询数据库中文章信息,重新建立索引。
-- 文章信息索引建立定时器
delete from QUARTZ_JOB where JOBID = '101';
insert into QUARTZ_JOB (JOBID, JBOGROUP, JOBTYPE, TASK, SCHEDULE, VALIDATED, MEMO)
values ('101', 'blog', 'JAVA', 'cn.muchen.blog.lucene.LeceneSearchQuartz', '0 */5 * * * ?', '1', null);
commit;
/**
*
* @ClassName::LeceneSearchQuartz
* @Description: TODO
* @author :柯雷
* @date :2018年11月27日 下午2:44:20
*
*/
public class LeceneSearchQuartz implements Job {
/** 日志打印对象 */
private static final Logger logger = LoggerFactory.getLogger(LeceneSearchQuartz.class);
/** spring boot上下文环境 */
private Environment environment = (Environment) SpringUtil.getBean("environment");
/**
* 索引生成对象
*/
GeneratorIndex generatorIndex = (GeneratorIndex) SpringUtil.getBean("generatorIndex");
@Override
public void execute(JobExecutionContext context) throws JobExecutionException {
JobKey key = context.getJobDetail().getKey();
// 打印当前执行时间
logger.info("【LeceneSearchQuartz】执行定时器:" + key.getName() + ";开始时间" + Util.getDqrq("yyyy-MM-dd HH:mm:ss"));
try {
generatorIndex.genIndex(environment.getProperty("lucene.path"));
// 打印当前执行时间
logger.info("【LeceneSearchQuartz】执行定时器:" + key.getName() + "成功;结束时间" + Util.getDqrq("yyyy-MM-dd HH:mm:ss"));
} catch (IOException e) {
logger.error("【LeceneSearchQuartz】执行定时器:" + key.getName() + "失败;失败原因:" + e);
}
}
}
前台检索
/**
* @Title:searchArticle
* @Description:检索文章
* @param :@param searchContent
* @param :@return
* @return :String
* @throws
*/
@RequestMapping("/searchArticle")
@ResponseBody
public String searchArticle(@RequestParam String SEARCHECONTENT) {
logger.info("【ArticleDetailController.searchArticle】检索文章:" + SEARCHECONTENT);
Map<String, Object> rtnMap = new HashMap<>();
try {
// 返回信息
rtnMap = searchIndex.searchArticle(SEARCHECONTENT, environment.getProperty("lucene.path"));
if (!Util.isEmpty(rtnMap)) { // 查询成功
rtnMap.put("code", Constants.PAGE_QUERY_CG);
rtnMap.put("message", "查询成功");
} else { // 查询失败
rtnMap = new HashMap<>();
rtnMap.put("code", Constants.PAGE_QUERY_SB);
rtnMap.put("message", "未查询到结果");
}
} catch (Exception e) {
logger.error("【ArticleDetailController.searchArticle】检索文章失败:" + e);
rtnMap.put("code", Constants.PAGE_QUERY_SB);
rtnMap.put("message", "查询文章失败");
}
return JSONObject.toJSONString(rtnMap);
}