只用了几百行代码写的百度搜索引擎,你看咋样?

public class LuceneIndexUtil {

private static String INDEX_PATH = “/opt/lucene/demo”;

private static IndexWriter writer;

public static LuceneIndexUtil getInstance() {

return SingletonHolder.luceneUtil;

}

private static class SingletonHolder {

public final static LuceneIndexUtil luceneUtil = new LuceneIndexUtil();

}

private LuceneIndexUtil() {

this.initLuceneUtil();

}

private void initLuceneUtil() {

try {

Directory dir = FSDirectory.open(Paths.get(INDEX_PATH));

Analyzer analyzer = new StandardAnalyzer();

IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

writer = new IndexWriter(dir, iwc);

} catch (IOException e) {

log.error(“create luceneUtil error”);

if (null != writer) {

try {

writer.close();

} catch (IOException ioException) {

ioException.printStackTrace();

} finally {

writer = null;

}

}

}

}

/**


  • 引单个文档

  • @param doc 文档信息

  • @throws IOException IO 异常

*/

public void addDoc(Document doc) throws IOException {

if (null != doc) {

writer.addDocument(doc);

writer.commit();

writer.close();

}

}

/**

  • 索引单个实体

  • @param model 单个实体

  • @throws IOException IO 异常

*/

public void addModelDoc(Object model) throws IOException {

Document document = new Document();

List fields = luceneField(model.getClass());

fields.forEach(document::add);

writer.addDocument(document);

writer.commit();

writer.close();

}

/**

  • 索引实体列表

  • @param objects 实例列表

  • @throws IOException IO 异常

*/

public void addModelDocs(List<?> objects) throws IOException {

if (CollectionUtils.isNotEmpty(objects)) {

List docs = new ArrayList<>();

objects.forEach(o -> {

Document document = new Document();

List fields = luceneField(o);

fields.forEach(document::add);

docs.add(document);

});

writer.addDocuments(docs);

}

}

/**

  • 清除所有文档

  • @throws IOException IO 异常

*/

public void delAllDocs() throws IOException {

writer.deleteAll();

}

/**

  • 索引文档列表

  • @param docs 文档列表

  • @throws IOException IO 异常

*/

public void addDocs(List docs) throws IOException {

if (CollectionUtils.isNotEmpty(docs)) {

long startTime = System.currentTimeMillis();

writer.addDocuments(docs);

writer.commit();

log.info(“共索引{}个 Document,共耗时{} 毫秒”, docs.size(), (System.currentTimeMillis() - startTime));

} else {

log.warn(“索引列表为空”);

}

}

/**

  • 根据实体 class 对象获取字段类型,进行 lucene Field 字段映射

  • @param modelObj 实体 modelObj 对象

  • @return 字段映射列表

*/

public List luceneField(Object modelObj) {

Map<String, Object> classFields = ReflectionUtils.getClassFields(modelObj.getClass());

Map<String, Object> classFieldsValues = ReflectionUtils.getClassFieldsValues(modelObj);

List fields = new ArrayList<>();

for (String key : classFields.keySet()) {

Field field;

String dataType = StringUtils.substringAfterLast(classFields.get(key).toString(), “.”);

switch (dataType) {

case “Integer”:

field = new IntPoint(key, (Integer) classFieldsValues.get(key));

break;

case “Long”:

field = new LongPoint(key, (Long) classFieldsValues.get(key));

break;

case “Float”:

field = new FloatPoint(key, (Float) classFieldsValues.get(key));

break;

case “Double”:

field = new DoublePoint(key, (Double) classFieldsValues.get(key));

break;

case “String”:

String string = (String) classFieldsValues.get(key);

if (StringUtils.isNotBlank(string)) {

if (string.length() <= 1024) {

field = new StringField(key, (String) classFieldsValues.get(key), Field.Store.YES);

} else {

field = new TextField(key, (String) classFieldsValues.get(key), Field.Store.NO);

}

} else {

field = new StringField(key, StringUtils.EMPTY, Field.Store.NO);

}

break;

default:

field = new TextField(key, JsonUtils.obj2Json(classFieldsValues.get(key)), Field.Store.YES);

break;

}

fields.add(field);

}

return fields;

}

public void close() {

if (null != writer) {

try {

writer.close();

} catch (IOException e) {

log.error(“close writer error”);

}

writer = null;

}

}

public void commit() throws IOException {

if (null != writer) {

writer.commit();

writer.close();

}

}

}

有了工具类,我们再写一个 demo 来进行数据的索引

import java.util.ArrayList;

import java.util.List;

/**


  • Function:

  • Author:@author Silence

  • Date:2020-10-17 21:08

  • Desc:

*/

public class Demo {

public static void main(String[] args) {

LuceneIndexUtil luceneUtil = LuceneIndexUtil.getInstance();

List articles = new ArrayList<>();

try {

//索引数据

ArticleModel article1 = new ArticleModel();

article1.setTitle(“Java天下第一”);

article1.setAuthor(“粉丝”);

article1.setContent(“这是一篇给大家介绍 Lucene 的技术文章,必定点赞评论转发!!!”);

ArticleModel article2 = new ArticleModel();

article2.setTitle(“天下第一”);

article2.setAuthor(“粉丝”);

article2.setContent(“此处省略两千字…”);

ArticleModel article3 = new ArticleModel();

article3.setTitle(“Java天下第一”);

article3.setAuthor(“粉丝”);

article3.setContent(“Today is big day!”);

articles.add(article1);

articles.add(article2);

articles.add(article3);

luceneUtil.addModelDocs(articles);

luceneUtil.commit();

} catch (Exception e) {

e.printStackTrace();

}

}

}

上面的 content 内容可以自行进行替换,小编这边避免凑字数的嫌疑就不贴了。

展示


运行结束过后,我们用过 Lucene 的可视化工具 luke 来查看下索引的数据内容,下载过后解压我们可以看到有.bat 和 .sh 两个脚本,根据自己的系统进行运行就好了。小编这边是 mac 用的是 sh 脚本运行,运行后打开设置的索引目录即可。

进入过后,我们可以看到下图显示的内容,选择 content 点击 show top items 可以看到右侧的索引数据,这里根据分词器的不同,索引的结果是不一样的,小编这里采用的分词器就是标准的分词器,小伙伴们可以根据自己的要求选择适合自己的分词器即可。

搜索数据


数据已经索引成功了,接下来我们就需要根据条件进行数据的搜索了,我们创建一个 LuceneSearchUtil.java 来操作数据。

import org.apache.commons.collections.MapUtils;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.*;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.springframework.beans.factory.annotation.Value;

import java.io.IOException;

import java.nio.file.Paths;

import java.util.Map;

public class LuceneSearchUtil {

private static String INDEX_PATH = “/opt/lucene/demo”;

private static IndexSearcher searcher;

public static LuceneSearchUtil getInstance() {

return LuceneSearchUtil.SingletonHolder.searchUtil;

}

private static class SingletonHolder {

public final static LuceneSearchUtil searchUtil = new LuceneSearchUtil();

}

private LuceneSearchUtil() {

this.initSearcher();

}

private void initSearcher() {

Directory directory;

try {

directory = FSDirectory.open(Paths.get(INDEX_PATH));

DirectoryReader reader = DirectoryReader.open(directory);

searcher = new IndexSearcher(reader);

} catch (IOException e) {

e.printStackTrace();

}

}

public TopDocs searchByMap(Map<String, Object> queryMap) throws Exception {

if (null == searcher) {

this.initSearcher();

}

if (MapUtils.isNotEmpty(queryMap)) {

BooleanQuery.Builder builder = new BooleanQuery.Builder();

queryMap.forEach((key, value) -> {

if (value instanceof String) {

Query queryString = new PhraseQuery(key, (String) value);

// Query queryString = new TermQuery(new Term(key, (String) value));

builder.add(queryString, BooleanClause.Occur.MUST);

}

});

return searcher.search(builder.build(), 10);

}

return null;

}

}

在 demo.java 中增加搜索代码如下:

//查询数据

Map<String, Object> map = new HashMap<>();

map.put(“title”, “Java 天下第一”);

// map.put(“title”, “天下第一”);

// map.put(“content”, “最”);

LuceneSearchUtil searchUtil = LuceneSearchUtil.getInstance();

TopDocs topDocs = searchUtil.searchByMap(map);

System.out.println(topDocs.totalHits);

  • 18
    点赞
  • 16
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值