Elasticsearch-5.6.3简单使用教程
Elasticsearch初学者,不喜勿喷
本地服务搭建配置
项目中Elasticsearch搭建配置及使用(基于SpringBoot)
环境配置
- jdk:1.8
- spring boot:2.0.0.RELEASE
- 依赖引入
<!--ES相关依赖 start--> <dependency> <groupId>org.springframework.data</groupId> <artifactId>spring-data-elasticsearch</artifactId> <version>3.0.6.RELEASE</version> </dependency> <!--强制声明ES版本--> <dependency> <groupId>org.elasticsearch</groupId> <artifactId>elasticsearch</artifactId> <version>5.6.3</version> </dependency> <!--因为要采用java编码直接操作ES所以需要引入--> <dependency> <groupId>org.elasticsearch.client</groupId> <artifactId>transport</artifactId> <version>5.6.3</version> <exclusions> <exclusion> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> </exclusion> </exclusions> </dependency> <!--ES相关依赖 end-->
参数配置
spring.data.elasticsearch.cluster-name=zdmj-application
spring.data.elasticsearch.cluster-nodes= 服务器地址:9300
spring.data.elasticsearch.repositories.enabled=false
加载Elasticsearch配置信息
<!-- 继承ElasticsearchRepository的接口层,详情见数据同步的继承ElasticsearchRepository -->
@EnableElasticsearchRepositories(basePackages="*.*.dao.cache")
数据同步
建模
- 集成ik分词器教程
- Demo
@Document(indexName = "article", type = "articleTemplate", replicas = 0, shards = 3)
@Data
public class ArticleSearchTemplatePO {
@Id //主键id,唯一
private Integer articleId;
//关键字,不分词
@Field(type = FieldType.keyword)
private String coverUrl;
@Field(searchAnalyzer = "ik_max_word", analyzer = "ik_max_word", type = FieldType.text) //关键字,ik_max_word 分词器
private String title;
//搜索条件使用分词器
@Field(type = FieldType.Integer)
//设置默认值
private Integer status = 0;
}
//IK 包含两类分词器:
//ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合;
//ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”。
继承ElasticsearchRepository
public interface IArtcilesSearchRespository extends ElasticsearchRepository<ArticleSearchTemplatePO, Integer> {
}
//Integer与ArticleSearchTemplatePO的实体类的主键@id相同
同步到ES
@Autowired
private IArtcilesSearchRespository artcilesSearchRespository;
//从数据库中获取数据存储到ES
@Override
public void copyAllArticlesToES() {
Integer totalCount = iAppHomeSearch.selectArticlesCount();
//防止数据量过大
int size = 10000;
int total = totalCount / size + 1;
for (int i = 1; i <= total; ++i) {
PageHelper.startPage(i, size);
//从数据库中获取数据
List<ArticleSearchTemplatePO> article = iAppHomeSearch.getAllArticle();
PageInfo<ArticleSearchTemplatePO> articleSearch = new PageInfo(article);
List<ArticleSearchTemplatePO> articleList = articleSearch.getList();
//存储到ES
artcilesSearchRespository.saveAll(articleList);
}
}
Elasticsearch查询
查询
//分页
Pageable pageable = PageRequest.of(page, pageSize);
//搜索条件
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
//非必须,分词查询
boolQuery.should(QueryBuilders.matchQuery("title", searchValue));
//必须,不分词查询
boolQuery.must(QueryBuilders.termQuery("status", 1));
SearchQuery searchQuery = new NativeSearchQueryBuilder()
//查询
.withQuery(boolQuery)
//分页
.withPageable(pageable)
.build();
List<ArticleSearchTemplatePO> article= artcilesSearchRespository.search(searchQuery).getContent();
排序
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
SearchQuery searchQuery = new NativeSearchQueryBuilder()
//排序 ,SortOrder.DESC 倒序 SortOrder.ASC 升序
.withSort(SortBuilders.fieldSort("readNum").order(SortOrder.DESC))
//查询
.withQuery(boolQuery)
//分页
.withPageable(pageable)
.build();
高亮
/**
* 高亮标签设置
* PRETAG 高亮前部分
*/
private final String PRETAG = "<";
/**
* 高亮标签设置
* POSTTAG 高亮后部分
*/
private final String POSTTAG = ">";
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
SearchQuery searchQuery = new NativeSearchQueryBuilder()
//查询
.withQuery(boolQuery)
//分页
.withPageable(pageable)
//高亮,多字段高亮处理,使用 "*" 替换 "title"
.withHighlightFields(new HighlightBuilder.Field("title").postTags(POSTTAG).preTags(PRETAG))
.build();
//高亮返回
Page<ArticleSearchTemplatePO> search = elasticsearchTemplate.queryForPage(searchQuery, ArticleSearchTemplatePO.class, new SearchResultMapper() {
@Override
public <T> AggregatedPage<T> mapResults(SearchResponse response, Class<T> clazz, Pageable pageable) {
ArrayList<ArticleSearchTemplatePO> poems = new ArrayList<>();
SearchHits hits = response.getHits();
SearchHit[] searchHits = hits.getHits();
for (SearchHit searchHit : searchHits) {
if (hits.getHits().length <= 0) {
return null;
}
Map<String, Object> map = searchHit.getSource();
ArticleSearchTemplatePO poem = new ArticleSearchTemplatePO();
try {
poem = (ArticleSearchTemplatePO) MapToObjectUtil.mapToObject(map, ArticleSearchTemplatePO.class);
} catch (Exception e) {
e.printStackTrace();
}
//多字段高亮,重复下面的 4 行代码,searchHit.getHighlightFields().get("高亮字段");,然后再set回去
HighlightField title = searchHit.getHighlightFields().get("title");
if (title != null) {
poem.setTitle(title.fragments()[0].toString());
}
poems.add(poem);
}
if (poems.size() > 0) {
return new AggregatedPageImpl<>((List<T>) poems);
}
return null;
}
});
List<ArticleSearchTemplatePO> list = search.getContent();
- 范围区间查询
boolQuery.must(QueryBuilders.rangeQuery("maxSalary").gte(salaryScope.get("minSalary")).lte(salaryScope.get("maxSalary")));
- 过滤空字符串
boolQuery.must(QueryBuilders.existsQuery("comCode"));
-
修改
- 从数据库中根据主键 @Id (详情见建模)重新读取一份
List<ArticleSearchTemplatePO> list = iAppHomeSearch.getAllArticle(articleId); if (list.size() > 0) { artcilesSearchRespository.saveAll(list); <!-- artcilesSearchRespository.save(list.get(0)) --> }
- 根据主键 @Id 从Elasticsearch服务器中读取,通过SET方法替换需要修改的数据
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery(); boolQuery.should(QueryBuilders.termQuery("articleId", articleId)); SearchQuery searchQuery = new NativeSearchQueryBuilder() .withQuery(boolQuery) .build(); List<ArticleSearchTemplatePO> list = artcilesSearchRespository.search(searchQuery).getContent(); if(list.size()>0){ list.get(0).setTitle("测试"); artcilesSearchRespository.save(list.get(0)); }
-
删除,根据主键 @Id 删除
artcilesSearchRespository.deleteById(articleId);
ik分词器
ik分词
- ik分词器下载 ,下载与本地elasticsearch版本一致的ik分词器
- elasticsearch-analysis-ik-5.6.3.zip解压到elasticsearch-5.6.3/plugins/ik目录下
- 重启ES
- 测试
POST /my_index/my_type/_bulk { "index": { "_id": "1"} } { "text": "你修复改下状态" } GET /my_index/_analyze { "text": "你修复改下状态", "analyzer": "ik_max_word" } <!--返回结果--> { "token": "你", "start_offset": 0, "end_offset": 1, "type": "CN_CHAR", "position": 0 }, { "token": "修复", "start_offset": 1, "end_offset": 3, "type": "CN_WORD", "position": 1 }, { "token": "改下", "start_offset": 3, "end_offset": 5, "type": "CN_WORD", "position": 2 }, { "token": "状态", "start_offset": 5, "end_offset": 7, "type": "CN_WORD", "position": 3 }
ik配置文件
- ik配置文件地址:es/plugins/ik/config目录
- 词库
- IKAnalyzer.cfg.xml:用来配置自定义词库
- main.dic:ik原生内置的中文词库,总共有27万多条,只要是这些单词,都会被分在一起
- quantifier.dic:放了一些单位相关的词
- suffix.dic:放了一些后缀
- surname.dic:中国的姓氏
- stopword.dic:英文停用词
- ik原生最重要的两个配置文件
- main.dic:包含了原生的中文词语,会按照这个里面的词语去分词
- stopword.dic:包含了英文的停用词
自定义词库
- 网络教程
- 进入es\plugins\ik\config文件夹
- 创建词库 .dic 后缀,字符格式 UTF-8 的文件
蓝瘦 香菇
- 打开IKAnalyzer.cfg.xml,加载新的词库
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<!-- 多个词库用 ; 隔开,本地我创建的词库在custom文件夹下 -->
<entry key="ext_dict">custom/test.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords"></entry>
<!--用户可以在这里配置远程扩展字典 -->
<!-- <entry key="remote_ext_dict">words_location</entry> -->
<!--用户可以在这里配置远程扩展停止词字典-->
<!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>
- 重启ES
- 测试
GET /my_index/_analyze
{
"text": "蓝瘦香菇",
"analyzer": "ik_max_word"
}
原词库
{
"token": "蓝",
"start_offset": 0,
"end_offset": 1,
"type": "CN_CHAR",
"position": 0
},
{
"token": "瘦",
"start_offset": 1,
"end_offset": 2,
"type": "CN_CHAR",
"position": 1
},
{
"token": "香菇",
"start_offset": 2,
"end_offset": 4,
"type": "CN_WORD",
"position": 2
}
自定义词库后的结果
{
"token": "蓝瘦",
"start_offset": 0,
"end_offset": 2,
"type": "CN_WORD",
"position": 0
},
{
"token": "香菇",
"start_offset": 2,
"end_offset": 4,
"type": "CN_WORD",
"position": 1
}
同义词
-
创建同义词库
es\config\analysis 创建UTF-8格式的txt文件 -
输入同义词,用英文逗号 , 隔开,多个
社保,公积金 西红柿,番茄
-
重启ES
-
加载同义词
PUT /test { "mappings": { "employee": { "properties": { "name":{ "type": "text", "analyzer": "ik-index", "search_analyzer": "ik-smart" } } } }, "settings": { "analysis": { "filter": { "local_synonym" : { "type" : "synonym", "synonyms_path" : "analysis/synonyms.txt" } }, "analyzer": { "ik-index": { "type": "custom", "tokenizer": "ik_max_word", "filter": [ "local_synonym" ] }, "ik-smart": { "type": "custom", "tokenizer": "ik_smart", "filter": [ "local_synonym" ] } } } } }
-
测试
PUT /test/employee/1 { "name" : "社保" } PUT /test/employee/2 { "name" : "公积金" } GET /test/employee/_search { "query":{ "match": { "name": "公积金" } } } 返回结果 "hits": { "total": 2, "max_score": 0.46029136, "hits": [ { "_index": "test", "_type": "employee", "_id": "2", "_score": 0.46029136, "_source": { "name": "公积金" } }, { "_index": "test", "_type": "employee", "_id": "1", "_score": 0.46029136, "_source": { "name": "社保" } } ] }