python操作elasticsearch

最新推荐文章于 2024-05-17 17:39:49 发布

影子浅笑

最新推荐文章于 2024-05-17 17:39:49 发布

阅读量3k

点赞数 3

分类专栏： python

本文链接：https://blog.csdn.net/u014199409/article/details/107719057

版权

python 专栏收录该内容

24 篇文章 0 订阅

订阅专栏

1、python 安装上elasticsearch 模块
2、
2.1 指定连接：

es = Elasticsearch(
    ['172.16.153.129:9200'],
    # 认证信息
    # http_auth=('elastic', 'changeme')
)

2.2 动态连接

es = Elasticsearch(
    ['esnode1:port', 'esnode2:port'],
    # 在做任何操作之前，先进行嗅探
    sniff_on_start=True,
    # 节点没有响应时，进行刷新，重新连接
    sniff_on_connection_fail=True,
    # 每 60 秒刷新一次
    sniffer_timeout=60
)

2.3 获取集群基本信息

In [39]: es.info()
Out[39]:
{'cluster_name': 'sharkyun',
 'cluster_uuid': 'rIt2U-unRuG0hJBt6BXxqw',
 'name': 'master',
 'tagline': 'You Know, for Search',
 'version': {'build_date': '2017-10-06T20:33:39.012Z',
  'build_hash': '1a2f265',
  'build_snapshot': False,
  'lucene_version': '6.6.1',
  'number': '5.6.3'}}

2.4 获取状态信息

In [41]: es.cluster.health()
Out[41]:
{'active_primary_shards': 6,
 'active_shards': 6,
 'active_shards_percent_as_number': 50.0,
 'cluster_name': 'sharkyun',
 'delayed_unassigned_shards': 0,
 'initializing_shards': 0,
 'number_of_data_nodes': 1,
 'number_of_in_flight_fetch': 0,
 'number_of_nodes': 1,
 'number_of_pending_tasks': 0,
 'relocating_shards': 0,
 'status': 'yellow',
 'task_max_waiting_in_queue_millis': 0,
 'timed_out': False,
 'unassigned_shards': 6}

2.5 查询请求

es = Elasticsearch(
        ['172.16.153.129:9200']
    )
    
response = es.search(
    index="logstash-2017.11.14", # 索引名
    body={             # 请求体
      "query": {       # 关键字，把查询语句给 query
          "bool": {    # 关键字，表示使用 filter 查询，没有匹配度
                "must": [      # 表示里面的条件必须匹配，多个匹配元素可以放在列表里
                    {
                        "match": {  # 关键字，表示需要匹配的元素
                            "TransId": '06100021650016153'   # TransId 是字段名， 06100021650016153 是此字段需要匹配到的值
                        }
                    },
                    {
                        "match": {
                            "Ds": '2017-05-06'
                        }
                    },
                    {
                        "match": {
                            "Gy": '2012020235'
                        }
                    }, ],
                 "must_not": {   # 关键字，表示查询的结果里必须不匹配里面的元素
                        "match": {  # 关键字
                            "message": "M("    # message 字段名，这个字段的值一般是查询到的结果内容体。这里的意思是，返回的结果里不能包含特殊字符 'M('
                        }
                 }
            }
        },
        
        # 下面是对返回的结果继续排序
        "sort": [{"@timestamp": {"order": "desc"}}],
        "from": start,  # 从匹配到的结果中的第几条数据开始返回，值是匹配到的数据的下标，从 0 开始
        "size": size    # 返回多少条数据
      }
)

得到返回的总条数：

total = res['hits']['total']

循环结果获取自己想要的总条数

res_dict={}
for hit in res['hits']['hits']:
    log_time = "%s|%s" % (hit['_source']['Ds'], hit['_source']['Us'])
    res_dict[log_time] = "%s|%s|%s|%s" % (hit['_source']['beat']['hostname'],hit['_source']['FileName'], hit['_source']['FileNum'],hit['_source']['Messager'])

单一字段查询

es = Elasticsearch(
        ['172.16.153.129:9200']
    )
s = Search(using=es,
    index="logstash-2017.11.14").filter("match",Gy='20160521491').query("match", TransId='06100021650016153').exclude("match", message="M(")
    
response = s.execute()

参数说明：

using  
    指明用那个已经连接的对象
query  
    接收的是查询体语句
exclude
    接收的是不匹配的字段 就像 must_not
    
filter
    接收的是过滤语句 ，过滤的条件意思是在返回结果中有这些条件的信息       

s.count()

常用查询例子：

{
  "query": {
    "filtered": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      }
    }
  },
  "aggs": {
    "execute_time": {
      "avg": {
        "field": "upstream_time_ms"
      }
    }
  }
}
//当然你也可以直接将过滤器写在aggs里面
{
  "size": 0,
  "aggs": {
    "filtered_aggs": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      },
      "aggs": {
        "execute_time": {
          "avg": {
            "field": "upstream_time_ms"
          }
        }
      }
    }
  }
}

cardinality 基数，比如计算uv
{
  "size": 0,
  "aggs": {
    "filtered_aggs": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      },
      "aggs": {
        "ipv": {
          "cardinality": {
            "field": "ip"
          }
        }
      }
    }
  }
}

percentile_ranks 指定一个范围，有多少数据落在这里
{
  "size": 0,
  "query": {
    "filtered": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      }
    }
  },
  "aggs": {
    "execute_time": {
      "percentile_ranks": {
        "field": "upstream_time_ms",
        "values": [
          50,
          160
        ]
      }
    }
  }
}

统计最近15分钟，不同的链接请求时间大小
{
  "size": 0,
  "query": {
    "filtered": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      }
    }
  },
  "aggs": {
    "execute_time": {
      "terms": {
        "field": "uri"
      },
      "aggs": {
        "avg_time": {
          "avg": {
            "field": "upstream_time_ms"
          }
        }
      }
    }
  }
}

找出url响应最慢的前2名
{
  "size": 0,
  "query": {
    "filtered": {
      "filter": {
        "range": {
          "@timestamp": {
            "gt": "now-15m",
            "lt": "now"
          }
        }
      }
    }
  },
  "aggs": {
    "execute_time": {
      "terms": {
        "size": 2,
        "field": "uri",
        "order": {
          "avg_time": "desc"
        }
      },
      "aggs": {
        "avg_time": {
          "avg": {
            "field": "upstream_time_ms"
          }
        }
      }
    }
  }
}

pv 分时统计图（每小时一统计）
{
  "size":0,
  "fields":false,
  "aggs": {
    "execute_time": {
      "date_histogram": {
        "field": "@timestamp",
        "interval": "1h"
      }
    }
  }
}

term 和terms
body = {
    "query":{
        "term":{
            "name":"python"
        }
    }
}
# 查询name="python"的所有数据
es.search(index="my_index",doc_type="test_type",body=body)termsbody = {    "query":{        "terms":{            "name":[                "python","android"            ]        }    }}# 搜索出name="python"或name="android"的所有数据es.search(index="my_index",doc_type="test_type",body=body)

match与multi_match
# match:匹配name包含python关键字的数据
body = {
    "query":{
        "match":{
            "name":"python"
        }
    }
}
# 查询name包含python关键字的数据
es.search(index="my_index",doc_type="test_type",body=body)


 multi_match:在name和addr里匹配包含深圳关键字的数据
 body = {
    "query":{
        "multi_match":{
            "query":"深圳",
            "fields":["name","addr"]
        }
    }
}
# 查询name和addr包含"深圳"关键字的数据
es.search(index="my_index",doc_type="test_type",body=body)

#idsbody = {
    "query":{
        "ids":{
            "type":"test_type",
            "values":[
                "1","2"
            ]
        }
    }
}
# 搜索出id为1或2d的所有数据
es.search(index="my_index",doc_type="test_type",body=body)


bool有3类查询关系，must(都满足),should(其中一个满足),must_not(都不满足)

body = {
    "query":{
        "bool":{
            "must":[
                {
                    "term":{
                        "name":"python"
                    }
                },
                {
                    "term":{
                        "age":18
                    }
                }
            ]
        }
    }
}
# 获取name="python"并且age=18的所有数据
es.search(index="my_index",doc_type="test_type",body=body)


body = {
    "query":{
        "range":{
            "age":{
                "gte":18,       # >=18
                "lte":30        # <=30
            }
        }
    }
}
# 查询18<=age<=30的所有数据
es.search(index="my_index",doc_type="test_type",body=body)


body = {
    "query":{
        "prefix":{
            "name":"p"
        }
    }
}
# 查询前缀为"赵"的所有数据
es.search(index="my_index",doc_type="test_type",body=body)


body = {
    "query":{
        "wildcard":{
            "name":"*id"
        }
    }
}
# 查询name以id为后缀的所有数据
es.search(index="my_index",doc_type="test_type",body=body)


body = {
    "query":{
        "match_all":{}
    }
    "sort":{
        "age":{                 # 根据age字段升序排序
            "order":"asc"       # asc升序，desc降序
        }
    }
}



body = {
    "query":{
        "match_all":{}
    },
    "aggs":{                        # 聚合查询
        "min_age":{                 # 最小值的key
            "min":{                 # 最小
                "field":"age"       # 查询"age"的最小值
            }
        }
    }
}# 搜索所有数据，并获取age最小的值es.search(index="my_index",doc_type="test_type",body=body)


body = {
    "query":{
        "match_all":{}
    },
    "aggs":{                        # 聚合查询
        "max_age":{                 # 最大值的key
            "max":{                 # 最大
                "field":"age"       # 查询"age"的最大值
            }
        }
    }
}# 搜索所有数据，并获取age最大的值es.search(index="my_index",doc_type="test_type",body=body)



body = {
    "query":{
        "match_all":{}
    },
    "aggs":{                        # 聚合查询
        "sum_age":{                 # 和的key
            "sum":{                 # 和
                "field":"age"       # 获取所有age的和
            }
        }
    }
}# 搜索所有数据，并获取所有age的和es.search(index="my_index",doc_type="test_type",body=body)



body = {
    "query":{
        "match_all":{}
    },
    "aggs":{                        # 聚合查询
        "avg_age":{                 # 平均值的key
            "sum":{                 # 平均值
                "field":"age"       # 获取所有age的平均值
            }
        }
    }
}
# 搜索所有数据，获取所有age的平均值
es.search(index="my_index",doc_type="test_type",body=body)

相关知识来自：https://www.jianshu.com/p/462007422e65

影子浅笑

关注

3
点赞
踩
22

收藏

觉得还不错? 一键收藏
0
评论
python操作elasticsearch

1、python 安装上elasticsearch 模块2、2.1 指定连接：es = Elasticsearch( ['172.16.153.129:9200'], # 认证信息 # http_auth=('elastic', 'changeme'))2.2 动态连接es = Elasticsearch( ['esnode1:port', 'esnode2:port'], # 在做任何操作之前，先进行嗅探 sniff_on_start=Tru
复制链接

扫一扫