springboot+solr实现搜索引擎
点关注不迷路,欢迎再来!
精简博客内容,尽量已专业术语来分享。
努力做到对每一位认可自己的读者负责。
帮助别人的同时更是丰富自己的良机。
上节我们已经将solr-7.72服务搭建成功,现在来实现springboot集成solr。
一.创建solr-test服务
1.pom.xml配置
<!-- solr API -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-solr</artifactId>
</dependency>
<!-- 解析文档内容工具包 -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.9</version>
</dependency>
2.配置application.yml文件
spring:
application:
name: solr
data:
solr:
host: http://127.0.0.1:8983/solr/mycore #mycore core实例名称
server:
port: 8088 # springboot项目启动的端口号
3.直接SolrServiceImpl实现类
@Service("solrService")
public class SolrServiceImpl implements ISolrService {
private final Log logger = LogFactory.getLog(getClass());
@Autowired
private SolrClient solrClient;
@Override
public void add(User solr) throws BussinessException{
try {
solrClient.addBean(solr);
solrClient.commit();
} catch (IOException e) {
throw new BussinessException("新增索引操作失败!", e);
} catch (SolrServerException e) {
throw new BussinessException("solr服务连接异常!", e);
}
}
@Override
public List<SolrModel> findAll(String valueKey) throws BussinessException{
SolrQuery solrQuery = new SolrQuery();
//关键字加引号实现全词匹配
//String key = "\ ""+valueKey+"\" ";
//solrQuery.setQuery("keyvalue:" + key);
solrQuery.setQuery("keyvalue:" + valueKey);
solrQuery.setRows(Integer.MAX_VALUE);
solrQuery.setStart(0);
List<SolrModel> solrModel = null;
try {
QueryResponse response = solrClient.query(solrQuery);
SolrDocumentList results = response.getResults();
solrModel = solrClient.getBinder().getBeans(SolrModel.class, results);
} catch (SolrServerException e) {
throw new BussinessException("solr服务连接异常!", e);
} catch (IOException e) {
throw new BussinessException("读取索引操作失败!", e);
}
return solrModel;
}
@Override
public void delete(String id) throws BussinessException{
UpdateResponse updateResponse;
try {
updateResponse = solrClient.deleteById(id);
solrClient.commit();
long elapsedTime = updateResponse.getElapsedTime();
logger.info("执行时间 elapsedTime===========" + elapsedTime);
int qTime = updateResponse.getQTime();
logger.info("qTime ===========:" + qTime);
int status = updateResponse.getStatus();
logger.info("执行状态 ===========:" + status);
} catch (SolrServerException e) {
throw new BussinessException("solr服务连接异常!", e);
} catch (IOException e) {
throw new BussinessException("删除索引操作失败!", e);
}
}
@Override
public void update(User solr) throws BussinessException{
try {
solrClient.addBean(solr);
solrClient.commit();
} catch (IOException e) {
throw new BussinessException("更新索引操作失败!", e);
} catch (SolrServerException e) {
throw new BussinessException("solr服务连接异常!", e);
}
}
@Override
public void addFile(String path) throws BussinessException{
// TODO Auto-generated method stub
}
@Override
public List<User > findAllHighlighter(String valueKey) throws BussinessException{
SolrQuery solrQuery = new SolrQuery();
solrQuery.setQuery("keyvalue:" + valueKey);
solrQuery.setRows(Integer.MAX_VALUE);
solrQuery.setStart(0);
solrQuery.setHighlight(true);
solrQuery.addHighlightField("name");
solrQuery.addHighlightField("address");
solrQuery.setHighlightSimplePre("<font color='red'>");
solrQuery.setHighlightSimplePost("</font>");
List<User > solrModel = null;
try {
QueryResponse response = solrClient.query(solrQuery);
SolrDocumentList results = response.getResults();
User = solrClient.getBinder().getBeans(User .class, results);
//返回高亮显示结果
Map<String, Map<String, List<String>>> highlighting = response.getHighlighting();
Set<String> set = highlighting.keySet();
for (String key : set) {
for (int i = 0; i < solrModel.size(); i++) {
User solrmodel=solrModel.get(i);
if(solrmodel.getId().equals(key)) {
Map<String, List<String>> map = highlighting.get(key); // id对应的查询结果,可能有多个字段,所以是map结构
Set<String> set2 = map.keySet();
solrmodel= getValue(solrmodel,set2,map);
}
}
}
} catch (SolrServerException e) {
throw new BussinessException("solr服务连接异常!", e);
} catch (IOException e) {
throw new BussinessException("读取索引内容失败!", e);
}
return solrModel;
}
private User getValue(User solrmodel,Set<String> set2,Map<String, List<String>> map) {
for (String key2 : set2) {
List<String> list = map.get(key2); // 字段对应的值,因为分词了,所以是一个String列表
for (String s : list) {
if("address".equals(key2)) {
solrmodel.setDescribe(s);
}else if("name".equals(key2)) {
solrmodel.setSummary(s);
}
}
}
return solrmodel;
}
}
4.自定义转换工具类
/**
* @Description: 自定义转换工具类
*/
public class ConvertUtil {
/**
* 获取文件后缀名
* @param fileName
* @return
*/
public static String getFileSufix(String fileName) {
int splitIndex = fileName.lastIndexOf(".");
return fileName.substring(splitIndex + 1).toLowerCase();
}
/**
* 获取当前日期字符串
* @return
*/
public static String formatDate(){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
Date now = new Date();
String nTime = sdf.format(now);
return nTime;
}
/**
* 获取传入日期字符串
* @param dat
* @return
*/
public static String formatDate(Date dat){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
//Date now = new Date();
String nTime = sdf.format(dat);
return nTime;
}
}
提取文档内容工具类
/**
* @Description: 提取文档内容工具类
*/
public class TikaUtil {
/**
* 获取txt文档内容
* @param is
* @return
*/
public static String txt2String(InputStream is) {
try {
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
TXTParser txtParser = new TXTParser();
txtParser.parse(is, handler, metadata, context);
return handler.toString();
} catch (Exception e) {
e.printStackTrace();
return "";
}
}
/**
* 获取word,excel,ppt,pdf文档内容
* @param is
* @return
*/
public static String doc2String(InputStream is) {
try {
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
AutoDetectParser parser = new AutoDetectParser();
parser.parse(is, handler, metadata);
return handler.toString();
} catch (Exception e) {
e.printStackTrace();
return "";
}
}
}
public class User implements Serializable {
//必须实现可序列化接口,要在网络上传输
@Field("user_id")//使用这个注释,里面的名字是根据你在solr数据库中配置的来决定
private String id;
@Field("user_name")
private String name;
@Field("user_sex")
private String sex;
@Field("user_address")
private String address;
setter/getter
}
5.配置field
Solr filed域的配置极为重要,filed的配置会影响到索引的创建和查询出的结果展示。
<!-- Solr Test search -->
<field name="title" type="text_ik" indexed="true" stored="true" />
<field name="content" type="text_ik" indexed="true" stored="true" />
<field name="filetype" type="string" indexed="true" stored="true" />
<field name="uploadtime" type="string" indexed="false" stored="true" />
<!-- user -->
<field name="user_id" type="text_ik" indexed="true" stored="true" />
<field name="user_name" type="text_ik" indexed="true" stored="true" />
<field name="user_sex" type="text_ik" indexed="true" stored="true" />
<field name="user_address" type="text_ik" indexed="true" stored="true" />
name:查询时的名称
type:这个是之前定义的FieldType的名称,在这使用的ik分词
indexed:是否索引(true/false)
stored:是否存储(是否将索引结果存储到索引库)
multivalued:是否多值(一般配合copyField使用)
6.配置field
步骤1:在本地准备将要建立索引的文件
准备两个docx文档,写入一些自定内容,放到指定目录下,当然也不一定非得要docx。以下是我准备的两个文件:
运行solr-test项目导入docx文件,访问:http://127.0.0.1:8088/addfile
导入user数据,访问:http://127.0.0.1:8088/addUser
二.多字段匹配方法
添加一个field字段,自定义为keyvalue,这个是数据库没有的字段, 注意multiValued=“true”,这个是必须要true。
<!-- 配置一个关键字段整合需要模糊查询的多个字段 -->
<field name="keyvalue" type="text_ik" indexed="true" stored="true" multiValued="true"/>
<!-- 将多个字段整到关键字段里面 -->
<copyField source="filetype" dest="keyvalue" maxChars="256"/>
<copyField source="title" dest="keyvalue" maxChars="256"/>
<copyField source="content" dest="keyvalue" maxChars="256"/>
<copyField source="user_sex" dest="keyvalue" maxChars="256"/>
<copyField source="user_name" dest="keyvalue" maxChars="256"/>
<copyField source="user_address" dest="keyvalue" maxChars="256"/>
注意此刻的keyvalue查询结果
现在我们来查询关键字“报告”,注意结果:
现在我们通过自定义field为keyvalue,成功的解决了多字段匹配方法,可实现全文关键字检索。