elasticsearch-dsl.py
背景:使用django、django-ninja、es进行开发。项目用到高亮检索、按照权重进行评分排序,在此做个记录。
1.安装
版本需相匹配,es7则使用7.*.*版本,es8则使用版本8
pip install elasticsearch==7.17.6
pip install elasticsearch-dsl==7.4.1
2.编写document.py
使用的是django项目,在app下新建document.py文件。
from elasticsearch_dsl import Keyword, Long, Document, Q, Index, Text
from django.conf import settings # pylint: disable=unused-import
from ***.db.elasticsearch import elasticsearch # 由于公司封装了elasticsearch ,所以这里调用公司的elasticsearch 引擎,与默认的基本一致
from website.apps.myapp.models import Goods
class GoodsDocument(Document):
"""
GoodsDocument
"""
# 需精确搜索的字段
good_id= Keyword()
good_type = Keyword()
class Index:
"""
Index
"""
# Name of the Elasticsearch index
name = settings.PROJECT_ES_GOOD_INDEX # 此处为es索引名称,在setting.py文件中配置
# See Elasticsearch Indices API reference for available settings
settings = {'number_of_shards': 1,
'number_of_replicas': 0}
class Django:
"""Django"""
model = Goods # The model associated with this Document
# 同步到es的字段,用于模糊搜索
fields = ['good_name','full_name', 'description']
@staticmethod
def build_query_dsl(search, filters, sort):
"""
构建es query dsl
:param search: ES search对象
:param filters: 查询关键词
:param sort: 排序
"""
if not filters.query:
query_dsl = Q('match_all')
else:
query_dsl = Q('bool',
should=[
Q('multi_match',
query=filters.query,
fields=Goods.get_query_fields(), # pylint:disable=E1101
)
],
minimum_should_match=1
)
highlight_fields = set(Goods.get_query_fields()) # pylint:disable=E1101
search = search.query(query_dsl).extra(track_total_hits=True)
if sort == '':
search = search.sort('good_id')
if filters.good_type : # 前端传值多个用,分隔开
good_types = filters.good_type.split(',') if ',' in filters.good_type else [filters.good_type]
search = search.filter('terms', good_type=good_types )
# 当有query关键字查询时,才启用高亮功能;其它情况都是精确匹配
if filters.query:
# 定义自定义评分函数 根据不同权重排序 数字可自定义
score_function = {
'script_score': {
'script': {
'lang': 'painless',
'source': '''
double score = 0;
if (doc['goods_name.keyword'].size() > 0) {
score = 4;
} else if (doc['full_name.keyword'].size() > 0) {
score = 3;
}
return score;
'''
}
}
}
search = search.query('function_score', query=query_dsl, functions=score_function, score_mode='sum',
boost_mode='replace')
search = search.highlight(*highlight_fields).highlight_options(number_of_fragments=0)
search = search.sort('_score')
return search
""" 由于封装了es,导致Document原有函数不生效,因此需重写,有需要可封装在单独文件中"""
@classmethod
def search(cls, using=None, index=None):
"""
检索
"""
return super().search(using=elasticsearch)
@classmethod
def init(cls, index=None, using=None):
"""
创建索引
"""
return super().init(using=elasticsearch)
def save(self, using=None, index=None, validate=True, skip_empty=True, return_doc_meta=False, **kwargs):
"""
保存
"""
self.meta.id = self.ensembl_id
return super(GeneDocument, self).save(using=elasticsearch, **kwargs) # pylint: disable=R1725
@classmethod
def delete_index(cls):
"""
删除索引。
"""
index = Index(cls._index._name, using=elasticsearch) # pylint:disable=W0212
if index.exists():
index.delete()
else:
print('Index does not exist.')
3.view视图
@route.get('/get_goods_data',
tags=['v0.1'],
response=list[GoodsModelSchema],
summary='获取 Gene search 页面的表格数据')
@paginate(CustomPagination)
def get_goods_data(_, filters: SearchFilterSchema = Query(...), sort: str = 'default'):
"""
搜索接口
"""
filters.clean()
search = GoodsDocument.search()
search = GoodsDocument.build_query_dsl(search, filters, sort)
return search
4.models.py
from utils.decorators.query_fields_decorator import query_fields_decorator # 自封装装饰器,获取所有字段
@query_fields_decorator
class Goods(models.Model):
"""Phenome model"""
good_id= models.CharField(max_length=50)
good_name = models.CharField(max_length=255)
full_name = models.CharField(max_length=50)
description = models.TextField()
5.query_fields_decorator .py编写
def query_fields_decorator(model):
"""
query_fields_decorator
"""
def get_query_fields():
"""
获取查询字段
"""
query = []
for field in model._meta.get_fields():
if field.__class__.__name__ in ('CharField', 'TextField'):
query.append(field.name)
return query
model.get_query_fields = get_query_fields
return model
6.django-ninja的统一分页
新建api.py文件
"""ninja 的api入口"""
import re
import json
from typing import Any, Mapping, Type
from django.http import HttpRequest
from django.conf import settings
from ninja import NinjaAPI
from ninja import Schema, Field
from ninja.renderers import BaseRenderer
from ninja.responses import NinjaJSONEncoder
from ninja.pagination import PaginationBase
class CustomRenderer(BaseRenderer):
"""
在原版的基础上给外层包裹'data'
"""
media_type = 'application/json'
encoder_class: Type[json.JSONEncoder] = NinjaJSONEncoder
json_dumps_params: Mapping[str, Any] = {}
def render(self, request: HttpRequest, data: Any, *, response_status: int) -> Any:
return json.dumps(
{'data': data}, cls=self.encoder_class, **self.json_dumps_params
)
class HandleHighlight:
"""
高亮替换功能
"""
def __init__(self, response):
self.response = response
@staticmethod
def sub_em(value: list[str]):
"""
<em>tRNA</em> 换成 <span class=high-light>tRNA</span>
"""
# 使用 re.sub() 方法进行替换
new_value = []
for item in value:
new_text = re.sub(r'<em>(.*?)<\/em>', r'<span class=high-light>\1</span>', item)
new_value.append(new_text)
return ','.join(new_value) # 多个高亮字段可能有问题
@property
def value(self):
"""
替换后放回原处
"""
result = []
for item in self.response.hits.hits:
source = item._source
# 如果有高亮命中,则替换 em 为 span
if getattr(item, 'highlight', None):
highlight = item.highlight.to_dict()
for key, value in highlight.items():
source[key] = self.sub_em(value)
result.append(source)
return result
class CustomPagination(PaginationBase):
"""
自定义符合组内规范的分页器
"""
class Input(Schema):
"""url parameters中的参数"""
page: int = Field(1, ge=1, required=False)
per_page: int = Field(10, ge=1, required=False)
class Output(Schema):
"""自定义分页输出的格式"""
page: int
per_page: int
total: int
items: list[Any] # `items` is a default attribute
def paginate_queryset(self, queryset, pagination: Input, **params):
"""
分页函数
如果启用ES高亮功能,需特殊处理
"""
page = pagination.page
per_page = pagination.per_page
total = queryset.count()
offset = (page - 1) * per_page
search = queryset[offset: offset + per_page]
if getattr(search, 'execute', None):
items = HandleHighlight(search.execute()).value # ES查询
# 数据格式转换 将检索值转成dict格式
for item in items:
items[items.index(item)] = item.to_dict()
else:
items = search # pg查询
return {
'items': items,
'page': page,
'per_page': per_page,
'total': total,
}
ninja_api = NinjaAPI(
title=f'{settings.PROJECT_CODE} 文档中心',
renderer=CustomRenderer(),
version='0.1.0',
description='goods',
docs_url='/docs' if settings.DEBUG else None, # 在线文档线上不可用
openapi_url='/openapi.json' if settings.DEBUG else None, # open.json线上不可用
openapi_extra={
'tags': [
{
'name': 'v0.1',
'description': '商品',
'externalDocs': {
'description': '需求文档',
'url': '***'
}
}]
},
)