Elasticsearch笔记

docker 安装

基本操作

  • 查看集群是否概况:http://[ip]:9200/_cat/nodes?v
  • 查看所有索引:http://[ip]:9200/_cat/indices
  • 指定索引setting:http://[ip]:9200/[indices_name]/_settings
  • 指定索引mapping:http://[ip]:9200/[indices_name]/_mapping
  • 指定索引数据:http://[ip]:9200/[indices_name]/_search

查询

  • 基本查询
# match
{
	"query": {
		"bool": {
			"must": [{
				"match": {
					"code": "000300"
				}},
				{
					"match": {
					"date": "2018-01-08"
				}}]
			}
		}
	}
}

# term
{
	"query": {
		"bool": {
			"filter": [{
				"term": {
					"code": "000300"
				}},
				{
					"term": {
					"date": "2018-01-08"
				}}]
			}
		}
	}
}
  • 多字段匹配
# match_phrase 仅限中文,相当于'==',完全匹配
{
  "query": {
    "match_phrase": {
        "content" : {
            "query" : "我的宝马多少马力",
            "slop" : 1
        }
    }
  }
}

# multi_match 多个字段进行匹配
{
  "query": {
    "multi_match": {
        "query" : "我的宝马多少马力",
        "fields" : ["title", "content"]
    }
  }
}
  • 过滤
{
	"query": {
		"bool": {
			"filter": [{
				"range": {
					"date": {
						"gt": "2018-01-01"
					}
				}
			},
			{
				"term": {
					"code": "000300"
				}
			}
			]
		}
	}
}
  • 聚合运算
#  SQL: where date = and data_source = group by province
body = {
            "size": 0,
            "query": {
                "bool": {
                    "filter": [{
                            'term': {
                                "date": date_time
                                }
                        },
                        {
                            'term': {
                                "data_source": source
                                }
                        }]
                }
            },

            "aggs": {
                "province_count": {
                    "terms": {"field": "province.keyword"}
                }
            }

        }

#  按 province group by count。注意 "size": 0
body = {
            "size": 0,
            "aggs": {
                "province_count": {
                    "terms": {"field": "province.keyword"}
                }
            }
        }

#   count group by  where province = province 
body = {
            "size": 0,
            "query" : {
                "match_phrase" : {
                    "province" : province
                    }
                },

            "aggs": {
                "city_count": {
                    "terms": {"field": "city.keyword"}
                    }
                }
        }
        
# ount group by  where province = province and city = city
body = {
            "size": 0,
            "query": {
                "bool": {
                    "must":[{
                        "match_phrase": {
                            "province": province
                            }
                    },
                        {
                            "match_phrase": {
                                "city": city
                            }
                        }]
                }
            },

            "aggs": {
                "area_count": {
                    "terms": {"field": "area.keyword"}
                }
            }
        }

# group by 2个变量
body = {
            "size": 0,
            "aggs": {
                "province_count": {
                    "terms": {"field": "province.keyword"},

                    "aggs": {
                        "type_count": {
                            "terms": {"field": "community_type.keyword"}
                        }
                    }

                }
            }
        }

# group by and where
body = {
            "size": 0,
            "query": {
                "bool": {
                    "must":[{
                        "match_phrase": {
                            "province": province
                            }
                    },
                        {
                            "match_phrase": {
                                "city": city
                            }
                        }]
                }
            },

            "aggs": {
                "area_count": {
                    "terms": {"field": "area.keyword"},

                    "aggs": {
                        "type_count": {
                            "terms": {"field": "community_type.keyword"}
                        }
                    }

                }
            }
        }

代码交互

  • shell
    • 基本操作
    cat requests
    curl -s -H "Content-Type: application/x-ndjson" -XPOST ip:9200/_bulk --data-binary "@requests"
    curl -XGET 'http://ip:9200/test_index/chen/1?pretty'
    
    • Post数据
    curl  -XPOST  '192.168.0.153:9200/_bulk'   --data-binary  @request
    
  • Python
    • 连接方法
    from elasticsearch import Elasticsearch
    def connect_es():
      ES_HOST = 'ip'
      ES_PORT = 9200
      url = 'http://{}:{}'.format(ES_HOST, ES_PORT)
      return Elasticsearch([url])
    
    • Post数据
    # 逐笔上传
    def post_data(es_index, es_id, data):
      es = connect_es()
      res = es.create(index=es_index, doc_type=es_index, id=es_id, body=data)
      if res['result'] == 'created':
          return 'success', 201
      else:
          return 'put fail', 400
    
    # 批量上传
    def test(es_index, es_id, data):
      from elasticsearch import helpers
      es = connect_es()
      course = []
      data_data = {
          "_index": es_index,
          "_type": es_index,
          "_id": es_id,
          "_source": data
      }
    
      course.append(data_data)
      helpers.bulk(es, course)
    
    • 聚合运算
    body = {
    "size": 0,
    "aggs": {
      "province_count": {
        "terms": { "field": "province.keyword" ,
                                "size": 1000000},  # 注意 返回的doc_count_error_upper_bound(最大错误数) 和 sum_other_doc_count (错误数总和)
    
            "aggs": {
          "industry_count": {
                "terms": { "field": "city.keyword" },
    
            "aggs": {
              "industry_count": {
                "terms": {"field": "area.keyword"}
              }
            }
    
                   }
                 }
    
          }
        }
      }
    
    res = es.search(index='spider-fang', doc_type='spider-fang', body=body)
    res_data = res['aggregations']['province_count']['buckets']
    

SQL查询: kibana内转换

  • query
POST /_xpack/sql?format=txt
{
    "query": """
    SELECT province,city,area, community_name 
    FROM "spider-fang"
    where province='福建省' and match(city, '厦门')

    """
}
  • group by
POST /_xpack/sql?format=txt
{
    "query": """
    SELECT province,city,count(1) 
    FROM "spider-fang"
    where province='福建省'
    group by province, city
    
    """
}
  • SQL 转 query
POST /_xpack/sql/translate
{
    "query": """
  SELECT doc.province, doc.city, doc.area, count(1)
    FROM "ali-judicial" 
    where doc.province='河南省'
    group by doc.province, doc.city, doc.area
    """
}

FAQ

  • retrying failed action with response code: 403 FORBIDDEN/12/index read-only /

    • 现象:logstash写es大量报错
    retrying failed action with response code: 403 ({"type"=>"cluster_block_exception", "reason"=>"blocked by: [FORBIDDEN/12/index read-only / allow delete (api)];
    
    • 原因:主要是因为磁盘不够用了。Elasticsearch磁盘占用大于95%时将所有索引置为只读。如果你自己手动删除了数据,Elasticsearch不会给你自动切换回来,不过你可以手动去修改。
    • 解决方法
      • shell/cmd
      curl -XPUT -H "Content-Type: application/json" http://localhost:9200/_all/_settings -d '{"index.blocks.read_only_allow_delete": null}'
      
      • kibana
      PUT /_all/_settings {"index.blocks.read_only_allow_delete": null}
      
  • 中文字段 无法filter(filter为null的问题)

    • 现象:es中,中文字段,match时,非精确匹配,出现很多数据。 使用filter term过滤时,返回结果为null
    • 解决方法:将match 替换为 match_phrase
  • Fielddata is disabled on text fields by default. Set fielddata=true

    • 现象:
    Fielddata is disabled on text fields by default. Set fielddata=true
    "type": "illegal_argument_exception",
            "reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [region] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory."
    
    • 解决方法:
      • set index/_mapping/type_name region to “fielddata”: ture
      {
       "properties": {
        "region":{
         "type": "text",
         "fielddata": true
         }
       }
      }
      
      # local
      {
      "properties": {
        "code":{
         "type": "text",
         "fielddata": true
         }
        }
      }
      
      • 在相关field中用keyword即可
      # 详情请见 聚合运算 --> count groupby
      field="country.keyword"  
      
  • bound or publishing to a non-loopback address, enforcing bootstrap checks

    • 解决方法:yml config
    network.host: 0.0.0.0
    transport.host: localhost
    
  • max virtual memory areas vm.max_map_count [65530] is too low, increase to at least [262144]

    • 本机安装:在文件/etc/sysctl.conf中增加一行vm.max_map_count=262144
    • docker安装
      • 宿主机器下操作

         sudo sysctl -w vm.max_map_count=262144
        
        
      • docker修改

        environment:
         - "ES_JAVA_OPTS=-Xms1g -Xmx1g"
        
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值