es创建索引
# coding=utf-8
from __future__ import print_function
from elasticsearch import Elasticsearch
ES_HOSTS = ['elastic:xxxx@es-cn-xxx.elasticsearch.aliyuncs.com:9200']
INDEX_NAME = 'goodlook'
DOC_TYPE = 'post'
es = Elasticsearch(hosts=ES_HOSTS)
# 如果已经存在必须先删除再创建
res = es.indices.delete(index=INDEX_NAME)
request_body = {
"mappings": {
DOC_TYPE: {
"properties": {
"star": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"gender": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"age": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"scene": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"season": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"style": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"color": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"content": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"items": {
"type": "nested",
"properties": {
"category": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"color": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"attributes": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
}
}
}
}
}
}
}
res = es.indices.create(index=INDEX_NAME, ignore=400, body=request_body)
print(" response: {}".format(res))
es搜索
# coding=utf-8
from __future__ import print_function
from elasticsearch import Elasticsearch
ES_HOSTS = ['elastic:xxx@es-cn-xxx.elasticsearch.aliyuncs.com:9200']
INDEX_NAME = 'goodlook'
DOC_TYPE = 'post'
es = Elasticsearch(hosts=ES_HOSTS)
q = "女士 春秋 连衣裙"
resp = es.search(
index='goodlook',
doc_type='post',
size=10,
body={
"query": {
"multi_match": {
"query": q,
"fields": ["gender^50", "items.category^100", "color^100", "age", "star", "scene"]
}
}
})
print("response: {}".format(resp))
# /usr/bin/python3.6 /root/backend/aaa_xwk/es搜索.py
es类实现
#coding:utf8
import os
import time
from os import walk
from datetime import datetime
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
# es版本6.0
class MyElasticSearch(object):
def __init__(self, index_name, index_type, ip="127.0.0.1", port=9200):
'''
:param index_name: 索引名称
:param index_type: 索引类型
'''
self.index_name =index_name
self.index_type = index_type
self.ip = ip
self.port = port
# 无用户名密码状态
self.es = Elasticsearch([ip], port=self.port, timeout=50000)
#用户名密码状态
# self.es = Elasticsearch([self.ip], http_auth=('elastic', 'lyp82nLF'), port=self.port)
def create_index(self):
'''
创建索引, 6.0以后type移除string类型,index移除not_analyzed", index"的值只能是boolean变量
:param:
:return:
'''
#创建映射
_index_mappings = {
"mappings": {
self.index_type: {
"properties": {
"title": {
"type": "text",
"index": True,
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
},
"date": {
"type": "text",
"index": True
},
"keyword": {
"type": "text",
"index": True
},
"source": {
"type": "text",
"index": False
},
"link": {
"type": "text",
"index": False
}
}
}
}
}
if self.es.indices.exists(index=self.index_name) is not True:
es_index = self.es.indices.create(index=self.index_name, ignore=400, body=_index_mappings)
print("ES_index: ", es_index)
def insert_data(self):
'''
数据存储到es, 使用index或者create方法
:return:
'''
insert_list = [
{ "date": "2017-09-13",
"source": "慧聪网",
"link": "http://info.broadcast.hc360.com/2017/09/130859749974.shtml",
"keyword": "电视",
"title": "001"
},
{ "date": "2017-09-13",
"source": "中国文明网",
"link": "http://www.wenming.cn/xj_pd/yw/201709/t20170913_4421323.shtml",
"keyword": "电视",
"title": "002"
}
]
for item in insert_list:
# res = self.es.index(index=self.index_name, doc_type=self.index_type, body=item)
res = self.es.create(index=self.index_name, doc_type=self.index_type, id=insert_list.index(item)+10, body=item)
print(res)
def insert_data_by_bulk(self):
"""
bulk insert
:return:
"""
bulk_list = [
{"date": "2017-09-13",
"source": "慧聪网",
"link": "http://info.broadcast.hc360.com/2017/09/130859749974.shtml",
"keyword": "电视",
"title": "付费 电视 行业面临的转型和挑战"
},
{"date": "2017-09-13",
"source": "中国文明网",
"link": "http://www.wenming.cn/xj_pd/yw/201709/t20170913_4421323.shtml",
"keyword": "电视",
"title": "电视 专题片《巡视利剑》广获好评:铁腕反腐凝聚党心民心"
},
{"date": "2017-09-13",
"source": "人民电视",
"link": "http://tv.people.com.cn/BIG5/n1/2017/0913/c67816-29533981.html",
"keyword": "电视",
"title": "中国第21批赴刚果(金)维和部隊启程--人民 电视 --人民网"
},
{"date": "2017-09-13",
"source": "站长之家",
"link": "http://www.chinaz.com/news/2017/0913/804263.shtml",
"keyword": "电视",
"title": "电视 盒子 哪个牌子好? 吐血奉献三大选购秘笈"
}
]
actions = []
i = 1
for line in bulk_list:
action = {
"_index": self.index_name,
"_type": self.index_type,
"_id": i,
"_source": {
"date": line['date'],
"source": line['source'],
"link": line['link'],
"keyword": line['keyword'],
"title": line['title']}
}
i += 1
actions.append(action)
# 批量处理
success, _ = bulk(self.es, actions, index=self.index_name, raise_on_error=True)
print('Performed %d actions' % success)
def get_data_by_id(self, id):
"""
本方法使用es.get()方法,返回一个数据在_source里即是,没有外层的['hits']['hits']
:param id:
:return:
"""
res = self.es.get(index=self.index_name, doc_type=self.index_type, id=id)
print(res)
print(res['_source'])
return res, res['_source']
def get_data_by_query(self):
"""
本方法使用es.search()方法,返回数据在外层的['hits']['hits']的_source里面
:return:
"""
# doc = {'query': {'match_all': {}}}
doc = {
"query": {
"match": {
"keyword": "电视"
}
}
}
_searched = self.es.search(index=self.index_name, doc_type=self.index_type, body=doc)
for hit in _searched['hits']['hits']:
date = hit['_source']['date']
source = hit['_source']['source']
link = hit['_source']['link']
keyword = hit['_source']['keyword']
title = hit['_source']['title']
print(date, source, link, keyword, title)
return _searched, _searched['hits']['hits']
def update_data(self, id):
'''
删除索引中的一条
:param id:
:return:
'''
res = self.es.update(index='goodlook', doc_type='post', id=id, body={'doc': {'title': "003"}})
print(res)
def update_data_by_bulk(self):
'''
删除索引
:param id:
:return:
'''
# {
# '_op_type': 'update',
# '_index': 'goodlook',
# '_type': 'post',
# '_id': 42,
# 'doc': {'question': 'The life, universe and everything.'}
# }
actions = [
{'_op_type': 'update', '_id': 0, 'doc': {'keyword': '网络'}},
{'_op_type': 'update', '_id': 10, 'doc': {'keyword': '网络'}},
{'_op_type': 'update', '_id': 11, 'doc': {'keyword': '网络'}},
]
success, _ = bulk(self.es, actions, index=self.index_name, doc_type=self.index_type, raise_on_error=True)
print('Performed %d actions' % success)
def delete_one_data(self, id):
'''
删除索引中的一条
:param id:
:return:
'''
res = self.es.delete(index=self.index_name, doc_type=self.index_type, id=id)
print(res)
def delete_all(self):
'''
删除索引
:param id:
:return:
'''
# {
# '_op_type': 'delete',
# '_index': 'goodlook',
# '_type': 'post',
# '_id': 42,
# }
actions = [
{'_op_type': 'delete', '_id': 1},
{'_op_type': 'delete', '_id': 2},
{'_op_type': 'delete', '_id': 3},
{'_op_type': 'delete', '_id': 4},
{'_op_type': 'delete', '_id': 5},
]
success, _ = bulk(self.es, actions, index=self.index_name, doc_type=self.index_type, raise_on_error=True)
print('Performed %d actions' % success)
def delete_index(self):
res = self.es.indices.delete(index=self.index_name)
es_store = MyElasticSearch("goodlook", "post", "localhost", 9200)
# 创建一次即注释,不能重复重建索引
res = es_store.create_index()
print(res)
# ES_index: {'error': {'root_cause': [{'type': 'mapper_parsing_exception', 'reason': 'analyzer [ik_smart] not found for field [title]'}], 'type': 'mapper_parsing_exception', 'reason': 'Failed to parse mapping [post]: analyzer [ik_smart] not found for field [title]', 'caused_by': {'type': 'mapper_parsing_exception', 'reason': 'analyzer [ik_smart] not found for field [title]'}}, 'status': 400}
# None
# 本地就出现了,原来我屏蔽了
# 删除索引
# es_store.delete_index()
# 插入一个数据
# insert_data = es_store.insert_data()
# print(insert_data)
# 批量插入数据
# insert_data = es_store.insert_data_by_bulk()
# print(insert_data)
# 查询一条数据
# data, data01 = es_store.get_data_by_id(4)
# print(data01)
# {'date': '2017-09-13',
# 'source': '站长之家',
# 'link': 'http://www.chinaz.com/news/2017/0913/804263.shtml',
# 'keyword': '电视',
# 'title': '电视 盒子 哪个牌子好? 吐血奉献三大选购秘笈'
# }
# 查询所有数据
# data, data1 = es_store.get_data_by_query()
# 更新一条数据
# data = es_store.update_data(11)
# print(data)
# 批量更新数据
# data = es_store.update_data_by_bulk()
# print(data)
# 删除一条数据
# data = es_store.delete_one_data(11)
# print(data)
# 批量删除数据
# data = es_store.delete_all()
# print(data)
# 你要清楚索引结构, 数据结构,和返回数据结构