ElasticSearch系列08：python操作Elasticsearch

最新推荐文章于 2024-07-17 10:59:49 发布

TURING.DT

最新推荐文章于 2024-07-17 10:59:49 发布

阅读量1.1k

点赞数

分类专栏： ElasticSearch

本文链接：https://blog.csdn.net/levy_cui/article/details/70808559

版权

ElasticSearch 专栏收录该内容

10 篇文章 1 订阅

订阅专栏

Elasticsearch客户端列表：https://www.elastic.co/guide/en/elasticsearch/client/index.html
Python API：https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html
参考文档：http://elasticsearch-py.readthedocs.io/en/master/index.html

下面介绍一个python使用elasticsearch的例子

安装python elasticsearch包
下载个pip 网站https://pip.pypa.io/en/latest/installing/
#python get-pip.py
#pip install elasticsearch==2.4.1 指定版本

实例一：简单操作

from datetime import datetime
from elasticsearch import Elasticsearch

#连接elasticsearch,默认是9200
es = Elasticsearch()

#创建索引，索引的名字是my-index,如果已经存在了，就返回个400，
#这个索引可以现在创建，也可以在后面插入数据的时候再临时创建
es.indices.create(index='my-index')
#{u'acknowledged':True}

#插入数据,(这里省略插入其他两条数据，后面用)
es.index(index="my-index",doc_type="test-type",id=01,body={"any":"data01","timestamp":datetime.now()})
#{u'_type':u'test-type',u'created':True,u'_shards':{u'successful':1,u'failed':0,u'total':2},u'_version':1,u'_index':u'my-index',u'_id':u'1}
#也可以，在插入数据的时候再创建索引test-index
es.index(index="test-index",doc_type="test-type",id=42,body={"any":"data","timestamp":datetime.now()})

#查询数据，两种get and search
#get获取
res = es.get(index="my-index", doc_type="test-type", id=01)
print(res)
#{u'_type': u'test-type', u'_source': {u'timestamp': u'2016-01-20T10:53:36.997000', u'any': u'data01'}, u'_index': u'my-index', u'_version': 1, u'found': True, u'_id': u'1'}
print(res['_source'])
#{u'timestamp': u'2016-01-20T10:53:36.997000', u'any': u'data01'}

#search获取
res = es.search(index="test-index", body={"query":{"match_all":{}}})
print(res)
#{u'hits':
#    {
#    u'hits': [
#        {u'_score': 1.0, u'_type': u'test-type', u'_id': u'2', u'_source': {u'timestamp': u'2016-01-20T10:53:58.562000', u'any': u'data02'}, u'_index': u'my-index'},
#        {u'_score': 1.0, u'_type': u'test-type', u'_id': u'1', u'_source': {u'timestamp': u'2016-01-20T10:53:36.997000', u'any': u'data01'}, u'_index': u'my-index'},
#        {u'_score': 1.0, u'_type': u'test-type', u'_id': u'3', u'_source': {u'timestamp': u'2016-01-20T11:09:19.403000', u'any': u'data033'}, u'_index': u'my-index'}
#    ],
#    u'total': 5,
#    u'max_score': 1.0
#    },
#u'_shards': {u'successful': 5, u'failed': 0, u'total':5},
#u'took': 1,
#u'timed_out': False
#}
for hit in res['hits']['hits']:
    print(hit["_source"])

res = es.search(index="test-index", body={'query':{'match':{'any':'data'}}}) #获取any=data的所有值
print(res)

至于body里面参数的设置，具体请看：https://www.elastic.co/guide/en/elasticsearch/reference/current/query-filter-context.html

实例二、MongoDB与ES操作
由于Elasticsearch索引的文档是JSON形式，而MongoDB存储也是以JSON形式，因此这里选择通过MongoDB导出数据添加到Elasticsearch中。

使用MongoDB的Python API时，需要先安装pymongo，命令：pip install pymongo

import traceback
from pymongo import MongoClient
from elasticsearch import Elasticsearch

# 建立到MongoDB的连接
_db = MongoClient('mongodb://127.0.0.1:27017')['blog']

# 建立到Elasticsearch的连接
_es = Elasticsearch()

# 初始化索引的Mappings设置
_index_mappings = {
  "mappings": {
    "user": {
      "properties": {
        "title":    { "type": "text"  },
        "name":     { "type": "text"  },
        "age":      { "type": "integer" }  
      }
    },
    "blogpost": {
      "properties": {
        "title":    { "type": "text"  },
        "body":     { "type": "text"  },
        "user_id":  {
          "type":   "keyword"
        },
        "created":  {
          "type":   "date"
        }
      }
    }
  }
}

# 如果索引不存在，则创建索引
if _es.indices.exists(index='blog_index') is not True:
  _es.indices.create(index='blog_index', body=_index_mappings)

# 从MongoDB中查询数据，由于在Elasticsearch使用自动生成_id，因此从MongoDB查询
# 返回的结果中将_id去掉。
user_cursor = db.user.find({}, projection={'_id':False})
user_docs = [x for x in user_cursor]

# 记录处理的文档数
processed = 0
# 将查询出的文档添加到Elasticsearch中
for _doc in user_docs:
  try:
    # 将refresh设为true，使得添加的文档可以立即搜索到；
    # 默认为false，可能会导致下面的search没有结果
    _es.index(index='blog_index', doc_type='user', refresh=True, body=_doc)
    processed += 1
    print('Processed: ' + str(processed), flush=True)
  except:
    traceback.print_exc()

# 查询所有记录结果
print('Search all...',  flush=True)
_query_all = {
  'query': {
    'match_all': {}
  }
}
_searched = _es.search(index='blog_index', doc_type='user', body=_query_all)
print(_searched, flush=True)

# 输出查询到的结果
for hit in _searched['hits']['hits']:
  print(hit['_source'], flush=True)

# 查询姓名中包含jerry的记录
print('Search name contains jerry.', flush=True)
_query_name_contains = {
  'query': {
    'match': {
      'name': 'jerry'
    }
  }
}
_searched = _es.search(index='blog_index', doc_type='user', body=_query_name_contains)
print(_searched, flush=True)

运行上面的文件(elasticsearch_trial.py)：
python elasticsearch_tria.py

可以得到下面的输出结果：
Processed: 1
Processed: 2
Processed: 3
Search all...
{'took': 1, 'timed_out': False, '_shards': {'total': 5, 'successful': 5, 'failed': 0}, 'hits': {'total': 3, 'max_score': 1.0, 'hits': [{'_index': 'blog_index', '_type': 'user', '_id': 'AVn4TrrVXvwnWPWhxu5q', '_score': 1.0, '_source': {'title': 'Manager', 'name': 'Trump Heat', 'age': 67}}, {'_index': 'blog_index', '_type': 'user', '_id': 'AVn4TrscXvwnWPWhxu5s', '_score': 1.0, '_source': {'title': 'Engineer', 'name': 'Tommy Hsu', 'age': 32}}, {'_index': 'blog_index', '_type': 'user', '_id': 'AVn4Trr2XvwnWPWhxu5r', '_score': 1.0, '_source': {'title': 'President', 'name': 'Jerry Jim', 'age': 21}}]}}
{'title': 'Manager', 'name': 'Trump Heat', 'age': 67}
{'title': 'Engineer', 'name': 'Tommy Hsu', 'age': 32}
{'title': 'President', 'name': 'Jerry Jim', 'age': 21}
Search name contains jerry.
{'took': 3, 'timed_out': False, '_shards': {'total': 5, 'successful': 5, 'failed': 0}, 'hits': {'total': 1, 'max_score': 0.25811607, 'hits': [{'_index': 'blog_index', '_type': 'user', '_id': 'AVn4Trr2XvwnWPWhxu5r', '_score': 0.25811607, '_source': {'title': 'President', 'name': 'Jerry Jim', 'age': 21}}]}}

参考：

http://www.cnblogs.com/yxpblog/p/5141738.html

http://blog.csdn.net/mydeman/article/details/54808267

https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html