elasticsearch笔记

wang_zhij

已于 2023-05-27 15:51:42 修改

阅读量273

点赞数

文章标签： java servlet html elasticsearch

于 2023-05-27 15:44:08 首次发布

本文链接：https://blog.csdn.net/wang_zhij/article/details/130902167

版权

ES笔记

#搜索后面都加上 /_search

#---单字段查询----

# query-math/match_all/match_phrase/term-

# aggs--{"ageAgg"}--terms/aggs--field/size--

# sort-[{"account_number": {"order": "desc"}}]

# from：0

# size: 2

# _source:["balance","firstname"]

#-----多字段匹配-----

# query--multi_match--query/fields:["state","address"]

#----复合查询----

# query--bool--must/must_not/should/filter--match/term--

# query--bool--must/must_not/should/filter--match/term/rang--

# query--bool--filter-rang--gte/lte--

#-----java调用实现------------

#bulkRequest.add(indexRequest)

# SearchRequest--SearchSourceBuilder--QueryBuilders.matchQuery--

# SearchRequest--SearchSourceBuilder--AggregationBuilders.terms--

###来源 https://blog.csdn.net/hancoder/article/details/107612746

#http://192.168.56.10:9200/customer/external/_bulk

#批量新增

POST /customer/external/_bulk

{"index":{"_id":"1"}}

{"name":"张丹"}

{"index":{"_id":"2"}}

{"name":"张丹2"}

{"index":{"_id":"3"}}

{"name":"张丹3"}

## 批量执行——删除-新增-更新

POST /_bulk

{"delete":{"_index":"website","_type":"blog","_id":"123"}}

{"create":{"_index":"website","_type":"blog","_id":"123"}}

{"title":"my frist blog post"}

{"index":{"_index":"website","_type":"blog"}}

{"title":"my 2 logs post"}

{"update":{"_index":"website","_type":"blog","_id":"123"}}

{"doc":{"title":"my updated blog post"}}

#批量保存样例数据--练习使用

#https://github.com/elastic/elasticsearch/blob/master/docs/src/test/resources/accounts.json ，导入测试数据，

POST /bank/account/_bulk

#搜索都加上_search

GET /bank/_search?q=*&sort=account_number:asc

##The response also provides the following information about the search request:

#•took – how long it took Elasticsearch to run the query, in milliseconds

#•timed_out – whether or not the search request timed out

#•_shards – how many shards were searched and a breakdown of how many shards succeeded, failed, or were skipped.

#•max_score – the score of the most relevant document found

#•hits.total.value - how many matching documents were found

#•hits.sort - the document’s sort position (when not sorting by relevance score)

#•hits._score - the document’s relevance score (not applicable when using match_all)

##查询数据排序--match_all-查询所有

GET /bank/_search

{

"query": { "match_all": {} },

"sort": [

{ "account_number": "asc" },

{"balance":"desc"}

]

}

# 分页查询

GET bank/_search

{

"query": {

"match_all": {}

"from": 0,

"size": 2,

"sort": [

{

"account_number": {

"order": "desc"

}

]

}

#返回部分字段

GET bank/_search

{

"query": {

"match_all": {}

"from": 0,

"size": 2,

"sort": [

{

"account_number": {

"order": "desc"

}

"_source": ["balance","firstname"]

}

#基本类型（非字符串），精确控制--有得分max_score

GET bank/_search

{

"query": {

"match": {

"account_number": "20"

}

#字符串，全文检索--包含就行，最终会按照评分进行排序，会对检索条件进行分词匹配。

GET bank/_search

{

"query": {

"match": {

"address": "kings"

}

#match_phrase [短句匹配] 不可分割，不进行分词--将需要匹配的值当成一整个单词（不分词）进行检索

GET bank/_search

{

"query": {

"match_phrase": {

"address": "mill road"

}

#match_phrase和Match的区别，观察如下实例

GET bank/_search

{

"query": {

"match_phrase": {

"address": "990 Mill"

}

#使用match的keyword---不分割-查询结果，一条也未匹配到

GET bank/_search

{

"query": {

"match": {

"address.keyword": "990 Mill"

}

#使用match的keyword----修改匹配条件为“990 Mill Road” ---查询出一条数据

GET bank/_search

{

"query": {

"match": {

"address.keyword": "990 Mill Road"

}

#文本字段的匹配，使用keyword，匹配的条件就是要显示字段的全部值，要进行精确匹配的。

#match_phrase是做短语匹配，只要文本中包含匹配条件，就能匹配到。

#multi_math【多字段匹配】--会进行分词

# --state或者address中包含mill，并且在查询过程中，会对于查询条件进行分词。

GET bank/_search

{

"query": {

"multi_match": {

"query": "mill",

"fields": [

"state",

"address"

]

}

#bool用来做复合查询--复合语句可以合并，任何其他查询语句，包括符合语句。这也就意味着，复合语句之间--可以互相嵌套，可以表达非常复杂的逻辑。

#must：必须达到must所列举的所有条件

GET bank/_search

{

"query":{

"bool":{

"must":[

{"match":{"address":"mill"}},

{"match":{"gender":"M"}}

]

}

#must_not，必须不匹配must_not所列举的所有条件。

#should，应该满足should所列举的条件。

#实例：查询gender=m，并且address=mill的数据

GET bank/_search

{

"query": {

"bool": {

"must": [

{

"match": {

"gender": "M"

}

{

"match": {

"address": "mill"

}

]

}

#must_not：必须不是指定的情况

#实例：查询gender=m，并且address=mill的数据，但是age不等于38的

GET bank/_search

{

"query": {

"bool": {

"must": [

{

"match": {

"gender": "M"

}

{

"match": {

"address": "mill"

}

{

"term": {

"age": 28

}

"must_not": [

{

"match": {

"age": "38"

}

]

}

#should：应该达到should列举的条件，如果到达会增加相关文档的评分，并不会改变查询的结果。如果query中只有should且只有一种匹配规则，那么should的条件就会被作为默认匹配条件二区改变查询结果。

#实例：匹配lastName应该等于Wallace的数据

#能够看到相关度越高，得分也越高

GET bank/_search

{

"query": {

"bool": {

"must": [

{

"match": {

"gender": "M"

}

{

"match": {

"address": "mill"

}

"must_not": [

{

"match": {

"age": "18"

}

"should": [

{

"match": {

"lastname": "Wallace"

}

]

}

### （7）Filter【结果过滤】

#并不是所有的查询都需要产生分数，特别是哪些仅用于filtering过滤的文档。为了不计算分数，elasticsearch会自动检查场景并且优化查询的执行。

#filter在使用过程中，并不会计算相关性得分

#这里先是查询所有匹配address=mill的文档，然后再根据10000<=balance<=20000进行过滤查询结果

GET bank/_search

{

"query": {

"bool": {

"must": [

{

"match": {

"address": "mill"

}

"filter": {

"range": {

"balance": {

"gte": "10000",

"lte": "20000"

}

#在boolean查询中，must, should 和must_not 元素都被称为查询子句。文档是否符合每个“must”或“should”子句中的标准，决定了文档的“相关性得分”。得分越高，文档越符合您的搜索条件。默认情况下，Elasticsearch返回根据这些相关性得分排序的文档。

#The criteria in a must_not clause is treated as a filter. It affects whether or not the document is included in the results, but does not contribute to how documents are scored. You can also explicitly specify arbitrary filters to include or exclude documents based on structured data.

#“must_not”子句中的条件被视为“过滤器”。它影响文档是否包含在结果中，但不影响文档的评分方式。还可以显式地指定任意过滤器来包含或排除基于结构化数据的文档。

### （8）term---精确使用

# 和match一样。匹配某个属性的值。全文检索字段用match，其他非text字段匹配用term。

#Avoid using the term query for text fields.

#避免对文本字段使用“term”查询

#By default, Elasticsearch changes the values of text fields as part of analysis. This can make finding exact matches for text field values difficult.

#默认情况下，Elasticsearch作为analysis的一部分更改’ text '字段的值。这使得为“text”字段值寻找精确匹配变得困难。

#To search text field values, use the match.

#要搜索“text”字段值，请使用匹配。

# https://www.elastic.co/guide/en/elasticsearch/reference/7.6/query-dsl-term-query.html

GET bank/_search

{

"query": {

"term": {

"address": "mill Road"

}

#一条也没有匹配到

# 而更换为match匹配时，能够匹配到32个文档

#也就是说，全文检索字段用match，其他非text字段匹配用term

#包含就行

GET bank/_search

{

"query": {

"match_phrase": {

"address": "mill Road"

}

#整段匹配-区分大小写

GET bank/_search

{

"query": {

"match": {

"address.keyword": "990 Mill Road"

}

##精确查询

GET bank/_search

{

"query": {

"term": {

"balance": 25571

}

### （9）Aggregation（执行聚合）

#聚合提供了从数据中分组和提取数据的能力。最简单的聚合方法大致等于SQL Group by和SQL聚合函数。在elasticsearch中，执行搜索返回this（命中结果），并且同时返回聚合结果，把以响应中的所有hits（命中结果）分隔开的能力。这是非常强大且有效的，你可以执行查询和多个聚合，并且在一次使用中得到各自的（任何一个的）返回结果，使用一次简洁和简化的API啦避免网络往返

#size:0不显示搜索数据

#aggs：执行聚合。聚合语法如下：

#"aggs":{

# "aggs_name这次聚合的名字，方便展示在结果集中":{

# "AGG_TYPE聚合的类型(avg,term,terms)":{}

# }

#搜索address中包含mill的所有人的年龄分布以及平均年龄，但不显示这些人的详情

GET bank/_search

{

"query": {

"match": {

"address": "Mill"

}

"aggs": {

"ageAgg": {

"terms": {

"field": "age",

"size": 10

}

"ageAvg": {

"avg": {

"field": "age"

}

"balanceAvg": {

"avg": {

"field": "balance"

}

"size": 0

}

#复杂：按照年龄聚合，并且求这些年龄段的这些人的平均薪资--嵌套子聚合

GET bank/_search

{

"query": {

"match_all": {}

"aggs": {

"ageAgg": {

"terms": {

"field": "age",

"size": 100

"aggs": {

"ageAvg": {

"avg": {

"field": "balance"

}

"size": 0

}

# 查出所有年龄分布，并且这些年龄段中M的平均薪资和F的平均薪资以及这个年龄段的总体平均薪资

GET bank/_search

{

"query": {

"match_all": {}

"aggs": {

"ageAgg": {

"terms": {

"field": "age",

"size": 100

"aggs": {

"genderAgg": {

"terms": {

"field": "gender.keyword"

"aggs": {

"balanceAvg": {

"avg": {

"field": "balance"

}

"ageBalanceAvg": {

"avg": {

"field": "balance"

}

"size": 0

}

### 查看mapping信息

#Maping是用来定义一个文档（document），以及它所包含的属性（field）是如何存储和索引的。比如：使用maping来定义

GET bank/_mapping

##创建新索引--用于数据迁移

PUT /newbank

{

"mappings" : {

"properties" : {

"account_number" : {

"type" : "long"

"address" : {

"type" : "text"

"age" : {

"type" : "integer"

"balance" : {

"type" : "long"

"city" : {

"type" : "keyword"

"email" : {

"type" : "keyword"

"employer" : {

"type" : "keyword"

"firstname" : {

"type" : "text"

"gender" : {

"type" : "keyword"

"lastname" : {

"type" : "text",

"fields" : {

"keyword" : {

"type" : "keyword",

"ignore_above" : 256

}

"state" : {

"type" : "keyword"

}

#查看新的映射

GET /newbank/_mapping

# 旧数据是存在类型type 的

GET /bank/_search

#迁移数据

POST _reindex

{

"source":{

"index":"bank",

"type":"account"

"dest":{

"index":"newbank"

}

#查看新映射数据--1000条--数据迁移成功

GET /newbank/_search

#查看所有索引

GET _cat/indices

#查看es节点--

GET _cat/nodes

#查看主节点

GET _cat/master

#创建映射创建索引并指定映射

PUT /my_index

{

"mappings": {

"properties": {

"age": {

"type": "integer"

"email": {

"type": "keyword"

"name": {

"type": "text"

}

#创建映射

PUT questions

{

"mappings": {

"properties": {

"id": {

"type": "long",

"index": false,

"doc_values": false

"PARENT_ID": {

"type": "long",

"index": false,

"doc_values": false

"LABELCODE": {

"type": "keyword"

"CONTENT": {

"type": "text",

"analyzer": "ik_smart"

"SOURCE_ORG_ID": {

"type": "long"

"SUB_COUNT": {

"type": "integer"

"STATUS": {

"type": "integer"

"option": {

"type": "text",

"analyzer": "ik_smart"

"right": {

"type": "keyword"

"rigth_code": {

"type": "integer"

}

#查看映射

GET /my_index

#添加新的字段映射

#这里的 “index”: false，表明新增的字段不能被检索，只是一个冗余字段。

PUT /my_index/_mapping

{

"properties": {

"employee-id": {

"type": "keyword",

"index": false

}

##可以添加字段，但是不能更新字段类型

# 更新映射

#对于已经存在的字段映射，我们不能更新。更新必须创建新的索引，进行数据迁移。

# 数据迁移

#先创建new_twitter的正确映射。然后使用如下方式进行数据迁移--6.0之后不存在type的迁移

POST reindex [固定写法]

{

"source":{

"index":"twitter"

"dest":{

"index":"new_twitters"

}

#将旧索引的type下的数据进行迁移--6.0之前的存在type的迁移

POST reindex [固定写法]

{

"source":{

"index":"twitter",

"type":"twitter"

"dest":{

"index":"new_twitters"

}

GET /bank/_search

{"query":{"match_phrase":{

"address":"990 Concord"

}}}

GET /newbank/_search

{

"query":{

"bool": {

"must": {

"match": {

"address": "Street"

}

##聚合分析-统计查询

GET /newbank/_search

{"query": {

"bool": {

"must": [

{

"match": {

"address": "671 Street"

}

{

"term": {

"age": 36

}

"should": [

{

"match": {

"lastname": "Wallace"

}

]

}

"aggs": {

"sexAggs": {

"terms": {

"field": "gender",

"size": 10

"aggs": {

"balanceAvg": {

"avg": {

"field": "balance"

}

"cityAggs":{

"terms": {

"field": "state",

"size": 10

}

"size": 3

}

##分词器--标准分词器

POST _analyze

{

"analyzer": "standard",

"text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."

}

POST _analyze

{

"analyzer": "ik_smart",

"text": "北京市昌平区北七家镇"

}

##使用中文分词器-ik分词器-最大单词

GET _analyze

{

"analyzer": "ik_max_word",

"text":"我是中国人尚硅谷吃饭就去呵呵和合谷"

}

##自定义ik分词-http://192.168.56.10/es/fenci.txt

GET _analyze

{

"analyzer": "ik_smart",

"text":"我是中国人尚硅谷吃饭就去呵呵和合谷及藕粉色老疙岛常峪村村"

}

###------java调用--------------------------start--------------------------------

#<dependency>

# <groupId>org.elasticsearch.client</groupId>

# <artifactId>elasticsearch-rest-high-level-client</artifactId>

# <version>7.6.2</version>

#</dependency>

GET bank/_search

{

"query": {

"match": {

"address": "Mill"

}

"aggs": {

"ageAgg": {

"terms": {

"field": "age",

"size": 10

}

"ageAvg": {

"avg": {

"field": "age"

}

"balanceAvg": {

"avg": {

"field": "balance"

}

#/**

# * 复杂检索:在bank中搜索address中包含mill的所有人的年龄分布以及平均年龄，平均薪资

#* @throws IOException

#*/

# @Test

# public void searchData() throws IOException {

# //1. 创建检索请求

# SearchRequest searchRequest = new SearchRequest();

# //1.1）指定索引

# searchRequest.indices("bank");

# //1.2）构造检索条件

# SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

# sourceBuilder.query(QueryBuilders.matchQuery("address","Mill"));

# //1.2.1)按照年龄分布进行聚合

# TermsAggregationBuilder ageAgg=AggregationBuilders.terms("ageAgg").field("age").size(10);

# sourceBuilder.aggregation(ageAgg);

# //1.2.2)计算平均年龄

# AvgAggregationBuilder ageAvg = AggregationBuilders.avg("ageAvg").field("age");

# sourceBuilder.aggregation(ageAvg);

# //1.2.3)计算平均薪资

# AvgAggregationBuilder balanceAvg = AggregationBuilders.avg("balanceAvg").field("balance");

# sourceBuilder.aggregation(balanceAvg);

# System.out.println("检索条件："+sourceBuilder);

# searchRequest.source(sourceBuilder);

# //2. 执行检索

# SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

# System.out.println("检索结果："+searchResponse);

# //3. 将检索结果封装为Bean

# SearchHits hits = searchResponse.getHits();

# SearchHit[] searchHits = hits.getHits();

# for (SearchHit searchHit : searchHits) {

# String sourceAsString = searchHit.getSourceAsString();

# Account account = JSON.parseObject(sourceAsString, Account.class);

# System.out.println(account);

# }

# //4. 获取聚合信息

# Aggregations aggregations = searchResponse.getAggregations();

# Terms ageAgg1 = aggregations.get("ageAgg");

# for (Terms.Bucket bucket : ageAgg1.getBuckets()) {

# String keyAsString = bucket.getKeyAsString();

# System.out.println("年龄："+keyAsString+" ==> "+bucket.getDocCount());

# }

# Avg ageAvg1 = aggregations.get("ageAvg");

# System.out.println("平均年龄："+ageAvg1.getValue());

# Avg balanceAvg1 = aggregations.get("balanceAvg");

# System.out.println("平均薪资："+balanceAvg1.getValue());

# }

###------java调用--------------------------end--------------------------------

# @Test

# public void indexData() throws IOException {

# IndexRequest indexRequest = new IndexRequest ("users");

# User user = new User();

# user.setUserName("张三");

# user.setAge(20);

# user.setGender("男");

# String jsonString = JSON.toJSONString(user);

# //设置要保存的内容

# indexRequest.source(jsonString, XContentType.JSON);

# //执行创建索引和保存数据

# IndexResponse index = client.index(indexRequest, GulimallElasticSearchConfig.COMMON_OPTIONS);

# System.out.println(index);

# }

###----------------------------------------------------------------

#@Test

# public void searchData() throws IOException {

# GetRequest getRequest = new GetRequest(

# "users",

# "_-2vAHIB0nzmLJLkxKWk");

# GetResponse getResponse = client.get(getRequest, RequestOptions.DEFAULT);

# System.out.println(getResponse);

# String index = getResponse.getIndex();

# System.out.println(index);

# String id = getResponse.getId();

# System.out.println(id);

# if (getResponse.isExists()) {

# long version = getResponse.getVersion();

# System.out.println(version);

# String sourceAsString = getResponse.getSourceAsString();

# System.out.println(sourceAsString);

# Map<String, Object> sourceAsMap = getResponse.getSourceAsMap();

# System.out.println(sourceAsMap);

# byte[] sourceAsBytes = getResponse.getSourceAsBytes();

# } else {

# }

##-------------------------------------------------------

##--------------------------商品上架----mapping-----------------------

# SpuInfoServiceImpl.upSpuForSearch 封装上架数据

#ProductSaveServiceimpl.saveProductAsIndices----上架es操作

#"index": false, 不需要检索的冗余数据，只用于展示

#"doc_values": false

#"type": "nested" --数组的扁平化处理--嵌入式属性

PUT product

{

"mappings": {

"properties": {

"skuId": {

"type": "long"

"spuId": {

"type": "keyword"

"skuTitle":{

"type": "text",

"analyzer": "ik_smart"

"skuImg":{

"type":"keyword",

"index": false,

"doc_values": false

"attrs":{

"type": "nested",

"properties": {

"attrId":{

"type":"long"

"attrName":{

"type":"keyword",

"index":false,

"doc_values":false

"attrValue":{

"type":"keyword"

}

#查看映射--表结构

GET /product/_mapping

wang_zhij

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
elasticsearch笔记

elasticsearch使用
复制链接

扫一扫

elasticsearch笔记

“相关推荐”对你有帮助么？