只用了几百行代码写的百度搜索引擎，你看咋样？

最新推荐文章于 2024-07-13 13:55:40 发布

2401_85763952

最新推荐文章于 2024-07-13 13:55:40 发布

阅读量947

点赞数 18

文章标签：百度搜索引擎 java

本文链接：https://blog.csdn.net/2401_85763952/article/details/139787473

版权

public class LuceneIndexUtil {

private static String INDEX_PATH = “/opt/lucene/demo”;

private static IndexWriter writer;

public static LuceneIndexUtil getInstance() {

return SingletonHolder.luceneUtil;

}

private static class SingletonHolder {

public final static LuceneIndexUtil luceneUtil = new LuceneIndexUtil();

}

private LuceneIndexUtil() {

this.initLuceneUtil();

}

private void initLuceneUtil() {

try {

Directory dir = FSDirectory.open(Paths.get(INDEX_PATH));

Analyzer analyzer = new StandardAnalyzer();

IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

writer = new IndexWriter(dir, iwc);

} catch (IOException e) {

log.error(“create luceneUtil error”);

if (null != writer) {

try {

writer.close();

} catch (IOException ioException) {

ioException.printStackTrace();

} finally {

writer = null;

}

/**

索
引单个文档
@param doc 文档信息
@throws IOException IO 异常

public void addDoc(Document doc) throws IOException {

if (null != doc) {

writer.addDocument(doc);

writer.commit();

writer.close();

}

/**

索引单个实体
@param model 单个实体
@throws IOException IO 异常

public void addModelDoc(Object model) throws IOException {

Document document = new Document();

List fields = luceneField(model.getClass());

fields.forEach(document::add);

writer.addDocument(document);

writer.commit();

writer.close();

}

/**

索引实体列表
@param objects 实例列表
@throws IOException IO 异常

public void addModelDocs(List<?> objects) throws IOException {

if (CollectionUtils.isNotEmpty(objects)) {

List docs = new ArrayList<>();

objects.forEach(o -> {

Document document = new Document();

List fields = luceneField(o);

fields.forEach(document::add);

docs.add(document);

});

writer.addDocuments(docs);

}

/**

清除所有文档
@throws IOException IO 异常

public void delAllDocs() throws IOException {

writer.deleteAll();

}

/**

索引文档列表
@param docs 文档列表
@throws IOException IO 异常

public void addDocs(List docs) throws IOException {

if (CollectionUtils.isNotEmpty(docs)) {

long startTime = System.currentTimeMillis();

writer.addDocuments(docs);

writer.commit();

log.info(“共索引{}个 Document，共耗时{} 毫秒”, docs.size(), (System.currentTimeMillis() - startTime));

} else {

log.warn(“索引列表为空”);

}

/**

根据实体 class 对象获取字段类型，进行 lucene Field 字段映射
@param modelObj 实体 modelObj 对象
@return 字段映射列表

public List luceneField(Object modelObj) {

Map<String, Object> classFields = ReflectionUtils.getClassFields(modelObj.getClass());

Map<String, Object> classFieldsValues = ReflectionUtils.getClassFieldsValues(modelObj);

List fields = new ArrayList<>();

for (String key : classFields.keySet()) {

Field field;

String dataType = StringUtils.substringAfterLast(classFields.get(key).toString(), “.”);

switch (dataType) {

case “Integer”:

field = new IntPoint(key, (Integer) classFieldsValues.get(key));

break;

case “Long”:

field = new LongPoint(key, (Long) classFieldsValues.get(key));

break;

case “Float”:

field = new FloatPoint(key, (Float) classFieldsValues.get(key));

break;

case “Double”:

field = new DoublePoint(key, (Double) classFieldsValues.get(key));

break;

case “String”:

String string = (String) classFieldsValues.get(key);

if (StringUtils.isNotBlank(string)) {

if (string.length() <= 1024) {

field = new StringField(key, (String) classFieldsValues.get(key), Field.Store.YES);

} else {

field = new TextField(key, (String) classFieldsValues.get(key), Field.Store.NO);

}

} else {

field = new StringField(key, StringUtils.EMPTY, Field.Store.NO);

}

break;

default:

field = new TextField(key, JsonUtils.obj2Json(classFieldsValues.get(key)), Field.Store.YES);

break;

}

fields.add(field);

}

return fields;

}

public void close() {

if (null != writer) {

try {

writer.close();

} catch (IOException e) {

log.error(“close writer error”);

}

writer = null;

}

public void commit() throws IOException {

if (null != writer) {

writer.commit();

writer.close();

}

有了工具类，我们再写一个 demo 来进行数据的索引

import java.util.ArrayList;

import java.util.List;

/**

Function：
Author：@author Silence
Date：2020-10-17 21:08
Desc：无

public class Demo {

public static void main(String[] args) {

LuceneIndexUtil luceneUtil = LuceneIndexUtil.getInstance();

List articles = new ArrayList<>();

try {

//索引数据

ArticleModel article1 = new ArticleModel();

article1.setTitle(“Java天下第一”);

article1.setAuthor(“粉丝”);

article1.setContent(“这是一篇给大家介绍 Lucene 的技术文章，必定点赞评论转发！！！”);

ArticleModel article2 = new ArticleModel();

article2.setTitle(“天下第一”);

article2.setAuthor(“粉丝”);

article2.setContent(“此处省略两千字…”);

ArticleModel article3 = new ArticleModel();

article3.setTitle(“Java天下第一”);

article3.setAuthor(“粉丝”);

article3.setContent(“Today is big day!”);

articles.add(article1);

articles.add(article2);

articles.add(article3);

luceneUtil.addModelDocs(articles);

luceneUtil.commit();

} catch (Exception e) {

e.printStackTrace();

}

上面的 content 内容可以自行进行替换，小编这边避免凑字数的嫌疑就不贴了。

展示

运行结束过后，我们用过 Lucene 的可视化工具 luke 来查看下索引的数据内容，下载过后解压我们可以看到有.bat 和 .sh 两个脚本，根据自己的系统进行运行就好了。小编这边是 mac 用的是 sh 脚本运行，运行后打开设置的索引目录即可。

进入过后，我们可以看到下图显示的内容，选择 content 点击 show top items 可以看到右侧的索引数据，这里根据分词器的不同，索引的结果是不一样的，小编这里采用的分词器就是标准的分词器，小伙伴们可以根据自己的要求选择适合自己的分词器即可。

搜索数据

数据已经索引成功了，接下来我们就需要根据条件进行数据的搜索了，我们创建一个 LuceneSearchUtil.java 来操作数据。

import org.apache.commons.collections.MapUtils;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;

import org.apache.lucene.index.DirectoryReader;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.*;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.springframework.beans.factory.annotation.Value;

import java.io.IOException;

import java.nio.file.Paths;

import java.util.Map;

public class LuceneSearchUtil {

private static String INDEX_PATH = “/opt/lucene/demo”;

private static IndexSearcher searcher;

public static LuceneSearchUtil getInstance() {

return LuceneSearchUtil.SingletonHolder.searchUtil;

}

private static class SingletonHolder {

public final static LuceneSearchUtil searchUtil = new LuceneSearchUtil();

}

private LuceneSearchUtil() {

this.initSearcher();

}

private void initSearcher() {

Directory directory;

try {

directory = FSDirectory.open(Paths.get(INDEX_PATH));

DirectoryReader reader = DirectoryReader.open(directory);

searcher = new IndexSearcher(reader);

} catch (IOException e) {

e.printStackTrace();

}

public TopDocs searchByMap(Map<String, Object> queryMap) throws Exception {

if (null == searcher) {

this.initSearcher();

}

if (MapUtils.isNotEmpty(queryMap)) {

BooleanQuery.Builder builder = new BooleanQuery.Builder();

queryMap.forEach((key, value) -> {

if (value instanceof String) {

Query queryString = new PhraseQuery(key, (String) value);

// Query queryString = new TermQuery(new Term(key, (String) value));

builder.add(queryString, BooleanClause.Occur.MUST);

}

});

return searcher.search(builder.build(), 10);

}

return null;

}

在 demo.java 中增加搜索代码如下：

//查询数据

Map<String, Object> map = new HashMap<>();

map.put(“title”, “Java 天下第一”);

// map.put(“title”, “天下第一”);

// map.put(“content”, “最”);

LuceneSearchUtil searchUtil = LuceneSearchUtil.getInstance();

TopDocs topDocs = searchUtil.searchByMap(map);

System.out.println(topDocs.totalHits);

2401_85763952

关注

18
点赞
踩
16

收藏

觉得还不错? 一键收藏
0
评论
只用了几百行代码写的百度搜索引擎，你看咋样？

进入过后，我们可以看到下图显示的内容，选择 content 点击 show top items 可以看到右侧的索引数据，这里根据分词器的不同，索引的结果是不一样的，小编这里采用的分词器就是标准的分词器，小伙伴们可以根据自己的要求选择适合自己的分词器即可。log.info(“共索引{}个 Document，共耗时{} 毫秒”, docs.size(), (System.currentTimeMillis() - startTime));// map.put(“title”, “天下第一”);
复制链接

扫一扫