Elasticsearch入门

俺还能再抢救一下

于 2022-08-21 20:56:46 发布

阅读量1.1k

点赞数

文章标签： elasticsearch 搜索引擎大数据

本文链接：https://blog.csdn.net/weixin_46413513/article/details/126455304

版权

Elasticsearch入门

source

只获取 source 字段

GET student/_source/1

{
  "num": "1301",
  "first_name": "hua",
  "last_name": "li",
  "age": 20,
  "sex": "male",
  "score": [
    {
      "name": "art",
      "degree": "A"
    },
    {
      "name": "music",
      "degree": "B"
    },
    {
      "name": "math",
      "degree": "D"
    }
  ],
  "about": "I like to build cabinets"
}

只获取 source 内的部分字段

GET student/_doc/1?_source=age

{
  "_index": "student",
  "_id": "1",
  "_version": 1,
  "_seq_no": 0,
  "_primary_term": 1,
  "found": true,
  "_source": {
    "age": 20
  }
}

mget

同时获取多个文档

GET _mget
GET _mget
{
  "docs":[
    {
      "_index":"student",
      "_id":1
    },
    {
      "_index":"information",
      "_id":"FXHVNYIBKevSsIoItFM5"
    }
    ]
}

{
  "docs": [
    {
      "_index": "student",
      "_id": "1",
      "_version": 1,
      "_seq_no": 0,
      "_primary_term": 1,
      "found": true,
      "_source": {
        "num": "1301",
        "first_name": "hua",
        "last_name": "li",
        "age": 20,
        "sex": "male",
        "score": [
          {
            "name": "art",
            "degree": "A"
          },
          {
            "name": "music",
            "degree": "B"
          },
          {
            "name": "math",
            "degree": "D"
          }
        ],
        "about": "I like to build cabinets"
      }
    },
    {
      "_index": "information",
      "_id": "FXHVNYIBKevSsIoItFM5",
      "_version": 1,
      "_seq_no": 2,
      "_primary_term": 1,
      "found": true,
      "_source": {
        "first_name": "hua",
        "last_name": "li",
        "age": 20,
        "gender": "male",
        "interests": [
          "sport",
          "singing"
        ],
        "about": "I like to build cabinets"
      }
    }
  ]
}

同时获取多个文档,并只获取部分字段

GET _mget
{
  "docs":[
    {
      "_index":"student",
      "_id":1,
      "_source":["last_name","age"]
    },
    {
      "_index":"information",
      "_id":"FXHVNYIBKevSsIoItFM5",
      "_source":["last_name","age"]
    }
    ]
}

{
  "docs": [
    {
      "_index": "student",
      "_id": "1",
      "_version": 1,
      "_seq_no": 0,
      "_primary_term": 1,
      "found": true,
      "_source": {
        "last_name": "li",
        "age": 20
      }
    },
    {
      "_index": "information",
      "_id": "FXHVNYIBKevSsIoItFM5",
      "_version": 1,
      "_seq_no": 2,
      "_primary_term": 1,
      "found": true,
      "_source": {
        "last_name": "li",
        "age": 20
      }
    }
  ]
}

如果获取的是同个index的文档，可以简写

GET student/_mget
{
  "ids":["1","2"]
}

PUT和POST

PUT修改已有文档

文档每一项没有都写出来，会导致那些没写的直接不见

PUT student/_doc/1
{
  "name":"test"
}

{
  "_index": "student",
  "_id": "1",
  "_version": 2,
  "_seq_no": 3,
  "_primary_term": 4,
  "found": true,
  "_source": {
    "name": "test"
  }
}

将id=1的数据恢复

POST修改已有文档

这种写法跟上面的PUT一样，会导致那些没写的直接不见

POST student/_doc/1
{
  "name":"test"
}

{
  "_index": "student",
  "_id": "1",
  "_version": 4,
  "_seq_no": 5,
  "_primary_term": 4,
  "found": true,
  "_source": {
    "name": "test"
  }
}

update写法才不会

POST student/_update/1
{
  "doc":{
    "name":"test"
  }
}

{
  "_index": "student",
  "_id": "1",
  "_version": 6,
  "_seq_no": 7,
  "_primary_term": 4,
  "found": true,
  "_source": {
    "num": "1301",
    "first_name": "hua",
    "last_name": "li",
    "age": 20,
    "sex": "male",
    "score": [
      {
        "name": "art",
        "degree": "A"
      },
      {
        "name": "music",
        "degree": "B"
      },
      {
        "name": "math",
        "degree": "D"
      }
    ],
    "about": "I like to build cabinets",
    "name": "test"
  }
}

script

直接修改（需要知道id）

POST student/_update/4
{
  "script":{
    "source":"ctx._source.score[2].degree=params.degree",
    "lang":"painless",
    "params":{
      "degree":"C"
    }
  }
}

id 如果不存在会报错，可以用upsert属性，在id不存在的时候新建一个文档

POST student/_update/6
{
  "script":{
    "source":"ctx._source.score[2].degree=params.degree",
    "lang":"painless",
    "params":{
      "degree":"C"
    }
  },
  "upsert": {
    "degree":"D"
  }
}

先查询，后修改（不需要知道id）

POST student/_update_by_query
{
  
  "query": {
    "match": {
      "first_name": "xiaoming"
    }
  },
  "script": {
    "source": "ctx._source.age = params.age;ctx._source.score[0].name = params.name;",
    "lang": "painless",
    "params": {
      "age":30,
      "name":"artt"
    }
  }
}

解释

script内的source是赋值，等号右边的参数来自于params，ctx.是固定

此外，在 painless 语言中，key为中文时，是有问题的(重点是key，value中文是没事的)

新建一个中文文档

{
  "_index": "student",
  "_id": "5",
  "_version": 1,
  "_seq_no": 10,
  "_primary_term": 4,
  "found": true,
  "_source": {
    "姓名": "小明",
    "性别": "男",
    "年龄": 41
  }
}

直接改会报错

POST student/_update_by_query
{
  
  "query": {
    "match": {
      "姓名": "小明"
    }
  },
  "script": {
    "source": "ctx._source.年龄 = params.年龄",
    "lang": "painless",
    "params": {
        "年龄":50
    }
  }
}

采用中括号加引号才可以 [\“key\”]

POST student/_update_by_query
{
  
  "query": {
    "match": {
      "姓名": "小明"
    }
  },
  "script": {
    "source": "ctx._source[\"年龄\"] = params[\"年龄\"]",
    "lang": "painless",
    "params": {
        "年龄":50
    }
  }
}

删除一个文档——script里判断语句

POST student/_update/5
{
  "script": {
    "source": """
    if (ctx._source.uid != 1) {
      ctx.op = 'delete'
    } else {
      ctx.op = "none"
    }
    """
  }
}

{
  "_index": "student",
  "_id": "5",
  "_version": 3,
  "result": "deleted",
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "_seq_no": 13,
  "_primary_term": 4
}

对文档添加一个字段

POST student/_update/1
{
  "script": {
    "source": "ctx._source.newkey=\"new\"",
      "lang": "painless"
  }
}

{
  "_index": "student",
  "_id": "1",
  "_version": 9,
  "result": "updated",
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "_seq_no": 16,
  "_primary_term": 4
}

对文档删除一个字段

POST student/_update/1
{
  "script": {
    "source": "ctx._source.remove(\"newkey\")"
  }
}

字段即使删除，mapping还在
get student/_mapping

UPSERT 一个文档

POST student/_update/5
{
  "doc": {
    "author": "Albert Paro",
    "title": "Elasticsearch 5.0 Cookbook",
    "description": "Elasticsearch 5.0 Cookbook Third Edition",
    "price": "54.99"
  },
  "doc_as_upsert": true
}

{
  "_index": "student",
  "_id": "5",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  },
  "_seq_no": 18,
  "_primary_term": 4
}

检查一个文档是否存在

HEAD student/_doc/5
返回 200 - OK

批处理命令

index

POST _bulk
{ "index" : { "_index" : "twitter", "_id": 1} }
{"user":"双榆树-张三","message":"今儿天气不错啊，出去转转去","uid":2,"age":20,"city":"北京","province":"北京","country":"中国","address":"中国北京市海淀区","location":{"lat":"39.970718","lon":"116.325747"}}
{ "index" : { "_index" : "twitter", "_id": 2 }}
{"user":"东城区-老刘","message":"出发，下一站云南！","uid":3,"age":30,"city":"北京","province":"北京","country":"中国","address":"中国北京市东城区台基厂三条3号","location":{"lat":"39.904313","lon":"116.412754"}}
{ "index" : { "_index" : "twitter", "_id": 3} }
{"user":"东城区-李四","message":"happy birthday!","uid":4,"age":30,"city":"北京","province":"北京","country":"中国","address":"中国北京市东城区","location":{"lat":"39.893801","lon":"116.408986"}}
{ "index" : { "_index" : "twitter", "_id": 4} }
{"user":"朝阳区-老贾","message":"123,gogogo","uid":5,"age":35,"city":"北京","province":"北京","country":"中国","address":"中国北京市朝阳区建国门","location":{"lat":"39.718256","lon":"116.367910"}}
{ "index" : { "_index" : "twitter", "_id": 5} }
{"user":"朝阳区-老王","message":"Happy BirthDay My Friend!","uid":6,"age":50,"city":"北京","province":"北京","country":"中国","address":"中国北京市朝阳区国贸","location":{"lat":"39.918256","lon":"116.467910"}}
{ "index" : { "_index" : "twitter", "_id": 6} }
{"user":"虹桥-老吴","message":"好友来了都今天我生日，好友来了,什么 birthday happy 就成!","uid":7,"age":90,"city":"上海","province":"上海","country":"中国","address":"中国上海市闵行区","location":{"lat":"31.175927","lon":"121.383328"}}

create

POST _bulk
{ "create" : { "_index" : "twitter", "_id": 1} }
{"user":"双榆树-张三","message":"今儿天气不错啊，出去转转去","uid":2,"age":20,"city":"北京","province":"北京","country":"中国","address":"中国北京市海淀区","location":{"lat":"39.970718","lon":"116.325747"}}

delete

POST _bulk
{"delete":{"_index":"twitter","_id":1}}

一定要写_id，不写会报错

POST _bulk
{"delete":{"_index":"twitter"}}

{
  "error": {
    "root_cause": [
      {
        "type": "action_request_validation_exception",
        "reason": "Validation Failed: 1: id is missing;"
      }
    ],
    "type": "action_request_validation_exception",
    "reason": "Validation Failed: 1: id is missing;"
  },
  "status": 400
}

update

POST _bulk
{"update":{"_index":"twitter","_id":2}}
{"doc":{"location":{"lat":"20.0111"}}}

查询当前已有的文档

POST student/_search

查询有多少条文档

GET student/_count

{
  "count": 6,
  "_shards": {
    "total": 2,
    "successful": 2,
    "skipped": 0,
    "failed": 0
  }
}

导入json数据

下载测试数据
8.x版本需要把_type字段去除

curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/_bulk --data-binary @es.json
```
`GET bank_account/_count`
```json
{
  "count": 1000,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  }
}
```
## 打开/关闭 索引
### 关闭索引
会占用大量磁盘空间
`POST twitter/_close`
```json
{
  "acknowledged": true,
  "shards_acknowledged": true,
  "indices": {
    "twitter": {
      "closed": true
    }
  }
}
```
#### 尝试访问
`get twitter/_doc/1`
```json
{
  "error": {
    "root_cause": [
      {
        "type": "index_closed_exception",
        "reason": "closed",
        "index_uuid": "dcfocBk5QoqSycf2G7VEug",
        "index": "twitter"
      }
    ],
    "type": "index_closed_exception",
    "reason": "closed",
    "index_uuid": "dcfocBk5QoqSycf2G7VEug",
    "index": "twitter"
  },
  "status": 400
}
```
### 打开索引
`POST twitter/_open`
```json
{
  "acknowledged": true,
  "shards_acknowledged": true
}
```
## 两类搜索
### query
#### 搜索整个集群文档指定返回个数，默认返回10个
`GET /_search?size=20`等同于`GET _all/_search?size=20`
#### 针对指定索引进行搜索
`GET /twitter,student/_search`
#### 针对所有 index 为开头的索引来进行搜索，但是排除 index3 索引
`POST /index*,-index3/_search`
-  _score		表示我们搜索结果的相关度
-   value		表明搜索的结果数量
-   relation	表明上面的value是什么关系（eq  gte ...）
#### 分页
对比不同分页数，返回的结果就知道了
`GET /bank_account/_search?size=1&from=2`

### aggregation
统计分析
```json
GET student/_search
{
  "query": {
    "bool": {
      "should": [
        {"match":
          {"last_name":"li"}
        }
      ]
    }
  },
  "aggregations":{
    "count_score":{
      "terms": {
        "field": "score.degree.keyword"
      }
    }
  }
}
```
```json
{
  "took": 15,
  "timed_out": false,
  "_shards": {
    "total": 2,
    "successful": 2,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 2,
      "relation": "eq"
    },
    "max_score": 0.4700036,
    "hits": [
      {
        "_index": "student",
        "_id": "2",
        "_score": 0.4700036,
        "_source": {
          "score": [
            {
              "name": "哈哈",
              "degree": "C"
            },
            {
              "name": "music",
              "degree": "B"
            },
            {
              "name": "math",
              "degree": "A"
            }
          ],
          "num": "1302",
          "sex": "male",
          "about": "I love to go rock climbing",
          "last_name": "li",
          "first_name": "xiaoming",
          "age": 30
        }
      },
      {
        "_index": "student",
        "_id": "1",
        "_score": 0.4700036,
        "_source": {
          "num": "1301",
          "first_name": "hua",
          "last_name": "li",
          "age": 20,
          "sex": "male",
          "score": [
            {
              "name": "art",
              "degree": "A"
            },
            {
              "name": "music",
              "degree": "B"
            },
            {
              "name": "math",
              "degree": "D"
            }
          ],
          "about": "I like to build cabinets",
          "name": "test",
          "newkey2": "new"
        }
      }
    ]
  },
  "aggregations": {
    "count_score": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "A",
          "doc_count": 2
        },
        {
          "key": "B",
          "doc_count": 2
        },
        {
          "key": "C",
          "doc_count": 1
        },
        {
          "key": "D",
          "doc_count": 1
        }
      ]
    }
  }
}
```
## 过滤
### filter path
`GET /bank_account/_search?filter_path=hits.hits`
可以多个
`GET /bank_account/_search?filter_path=hits.hits._index,hits.hits._source.city`
### source filtering
会显示source的内容
```json
GET /bank_account/_search
{
  "_source": ["address","age"],
  "query": {
    "match_all": {}
  }
}
```
或者用这种写法，返回结果一样
```json
GET /bank_account/_search?size=1
{
  "_source":{
    "includes": ["address","age"]
  },
  "query": {
    "match_all": {}
  }
}
```
```json
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1000,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "bank_account",
        "_id": "VVccOoIB6d515Zxt56V2",
        "_score": 1,
        "_source": {
          "address": "880 Holmes Lane",
          "age": 32
        }
      }
    ]
  }
}
```


上面的_source会返回多可能不想要的字段,搭配 fields 来指定返回的字段
```json
GET /bank_account/_search?size=1
{
  "_source": false,
  "fields": [
    "firstname","lastname"
  ], 
  "query": {
    "match_all": {}
  }
}
```
```json
{
  "took": 26,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1000,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "bank_account",
        "_id": "VVccOoIB6d515Zxt56V2",
        "_score": 1,
        "fields": {
          "firstname": [
            "Amber"
          ],
          "lastname": [
            "Duke"
          ]
        }
      }
    ]
  }
}
```

source单独使用，不搭配fields，false可以让整个source不显示
```json
GET /bank_account/_search?size=1
{
  "_source": false,
  "query": {
    "match_all": {}
  }
}
```
```json
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1000,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "bank_account",
        "_id": "VVccOoIB6d515Zxt56V2",
        "_score": 1
      }
    ]
  }
}
```

_source可搭配通配符、includes、excludes使用
```josn
GET /bank_account/_search?size=1
{
  "_source": {
    "excludes": "firstname",
    "includes": "*name"
    },
  "query": {
    "match_all": {}
  }
}
```
```json
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 1,
    "successful": 1,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": {
      "value": 1000,
      "relation": "eq"
    },
    "max_score": 1,
    "hits": [
      {
        "_index": "bank_account",
        "_id": "VVccOoIB6d515Zxt56V2",
        "_score": 1,
        "_source": {
          "firstname": "Amber",
          "lastname": "Duke"
        }
      }
    ]
  }
}
```
把 _source 设置为[]或{}，那么就是显示所有的字段，而不是不显示任何字段的功能
```json
GET /bank_account/_search?size=1
{
  "_source":{},
  "query": {
    "match_all": {}
  }
}
```

## Script fields
多了一个自建的字段
script里的doc['age']，只能用单引号
```json
GET /bank_account/_search?size=1
{
  "query": {
    "match_all": {}
  },
  "script_fields": {
    "life": {
      "script": {
        "source": "100-doc['age'].value"
      }
    }
  }
}
```
## 修改 settings
### 查看settings
`get bank_account/_settings`
number_of_shards 不能修改
number_of_replicas 可以修改

## 修改索引的 mapping
get bank_account/_mapping