django中使用elasticsearch-dsl.py

最新推荐文章于 2024-05-15 10:00:08 发布

weixin_46463851

最新推荐文章于 2024-05-15 10:00:08 发布

阅读量497

点赞数 9

文章标签： django elasticsearch python

本文链接：https://blog.csdn.net/weixin_46463851/article/details/134662826

版权

elasticsearch-dsl.py

背景：使用django、django-ninja、es进行开发。项目用到高亮检索、按照权重进行评分排序，在此做个记录。
1.安装
版本需相匹配，es7则使用7.*.*版本，es8则使用版本8

pip install elasticsearch==7.17.6
pip install elasticsearch-dsl==7.4.1

2.编写document.py
使用的是django项目，在app下新建document.py文件。

from elasticsearch_dsl import Keyword, Long, Document, Q, Index, Text
from django.conf import settings  # pylint: disable=unused-import
from ***.db.elasticsearch import elasticsearch # 由于公司封装了elasticsearch ，所以这里调用公司的elasticsearch 引擎，与默认的基本一致
from website.apps.myapp.models import Goods


class GoodsDocument(Document):
    """
    GoodsDocument
    """
    # 需精确搜索的字段
    good_id= Keyword()
    good_type = Keyword()
    
    class Index:
        """
        Index
        """
        # Name of the Elasticsearch index
        name = settings.PROJECT_ES_GOOD_INDEX  # 此处为es索引名称，在setting.py文件中配置
        # See Elasticsearch Indices API reference for available settings
        settings = {'number_of_shards': 1,
                    'number_of_replicas': 0}

    class Django:
        """Django"""
        model = Goods  # The model associated with this Document
        # 同步到es的字段，用于模糊搜索
        fields = ['good_name','full_name', 'description']
     
	@staticmethod
    def build_query_dsl(search, filters, sort):
        """
        构建es query dsl
        :param search: ES search对象
        :param filters: 查询关键词
        :param sort: 排序
        """
        if not filters.query:
            query_dsl = Q('match_all')
        else:
            query_dsl = Q('bool',
                          should=[
                              Q('multi_match',
                                query=filters.query,
                                fields=Goods.get_query_fields(),  # pylint:disable=E1101
                                )
                          ],
                          minimum_should_match=1
                          )
        highlight_fields = set(Goods.get_query_fields())  # pylint:disable=E1101
        search = search.query(query_dsl).extra(track_total_hits=True)
        if sort == '':
            search = search.sort('good_id')
        if filters.good_type :  # 前端传值多个用,分隔开
            good_types = filters.good_type.split(',') if ',' in filters.good_type else [filters.good_type]
            search = search.filter('terms', good_type=good_types )
            
        #  当有query关键字查询时，才启用高亮功能；其它情况都是精确匹配
        if filters.query:
            # 定义自定义评分函数 根据不同权重排序 数字可自定义
            score_function = {
                'script_score': {
                    'script': {
                        'lang': 'painless',
                        'source': '''
                            double score = 0;
                            if (doc['goods_name.keyword'].size() > 0) {
                                score = 4;
                            } else if (doc['full_name.keyword'].size() > 0) {
                                score = 3;
                            } 
                            return score;
                        '''
                    }
                }
            }
            search = search.query('function_score', query=query_dsl, functions=score_function, score_mode='sum',
                                  boost_mode='replace')
            search = search.highlight(*highlight_fields).highlight_options(number_of_fragments=0)
            search = search.sort('_score')
        return search
	
    """ 由于封装了es，导致Document原有函数不生效，因此需重写,有需要可封装在单独文件中"""
    @classmethod
    def search(cls, using=None, index=None):
        """
        检索
        """
        return super().search(using=elasticsearch)

    @classmethod
    def init(cls, index=None, using=None):
        """
        创建索引
        """
        return super().init(using=elasticsearch)

    def save(self, using=None, index=None, validate=True, skip_empty=True, return_doc_meta=False, **kwargs):
        """
        保存
        """
        self.meta.id = self.ensembl_id
        return super(GeneDocument, self).save(using=elasticsearch, **kwargs)  # pylint: disable=R1725

    @classmethod
    def delete_index(cls):
        """
        删除索引。
        """
        index = Index(cls._index._name, using=elasticsearch)  # pylint:disable=W0212

        if index.exists():
            index.delete()
        else:
            print('Index does not exist.')

3.view视图

@route.get('/get_goods_data',
           tags=['v0.1'],
           response=list[GoodsModelSchema],
           summary='获取 Gene search 页面的表格数据')
@paginate(CustomPagination)
def get_goods_data(_, filters: SearchFilterSchema = Query(...), sort: str = 'default'):
    """
    搜索接口
    """
    filters.clean()
    search = GoodsDocument.search()
    search = GoodsDocument.build_query_dsl(search, filters, sort)
    return search

4.models.py

from utils.decorators.query_fields_decorator import query_fields_decorator # 自封装装饰器，获取所有字段


@query_fields_decorator
class Goods(models.Model):
    """Phenome model"""
    good_id= models.CharField(max_length=50)
    good_name = models.CharField(max_length=255)
    full_name = models.CharField(max_length=50)
    description = models.TextField()

5.query_fields_decorator .py编写


def query_fields_decorator(model):
    """
    query_fields_decorator
    """
    def get_query_fields():
        """
        获取查询字段
        """
        query = []
        for field in model._meta.get_fields():
            if field.__class__.__name__ in ('CharField', 'TextField'):
                query.append(field.name)
        return query

    model.get_query_fields = get_query_fields
    return model

6.django-ninja的统一分页
新建api.py文件

"""ninja 的api入口"""
import re
import json
from typing import Any, Mapping, Type
from django.http import HttpRequest
from django.conf import settings
from ninja import NinjaAPI
from ninja import Schema, Field
from ninja.renderers import BaseRenderer
from ninja.responses import NinjaJSONEncoder
from ninja.pagination import PaginationBase


class CustomRenderer(BaseRenderer):
    """
    在原版的基础上给外层包裹'data'
    """
    media_type = 'application/json'
    encoder_class: Type[json.JSONEncoder] = NinjaJSONEncoder
    json_dumps_params: Mapping[str, Any] = {}

    def render(self, request: HttpRequest, data: Any, *, response_status: int) -> Any:
        return json.dumps(
            {'data': data}, cls=self.encoder_class, **self.json_dumps_params
        )


class HandleHighlight:
    """
    高亮替换功能
    """

    def __init__(self, response):
        self.response = response

    @staticmethod
    def sub_em(value: list[str]):
        """
         <em>tRNA</em> 换成  <span class=high-light>tRNA</span>
        """
        # 使用 re.sub() 方法进行替换
        new_value = []
        for item in value:
            new_text = re.sub(r'<em>(.*?)<\/em>', r'<span class=high-light>\1</span>', item)
            new_value.append(new_text)

        return ','.join(new_value)  # 多个高亮字段可能有问题

    @property
    def value(self):
        """
        替换后放回原处
        """
        result = []
        for item in self.response.hits.hits:
            source = item._source
            # 如果有高亮命中，则替换 em 为 span
            if getattr(item, 'highlight', None):
                highlight = item.highlight.to_dict()
                for key, value in highlight.items():
                    source[key] = self.sub_em(value)
            result.append(source)
        return result


class CustomPagination(PaginationBase):
    """
    自定义符合组内规范的分页器
    """

    class Input(Schema):
        """url parameters中的参数"""
        page: int = Field(1, ge=1, required=False)
        per_page: int = Field(10, ge=1, required=False)

    class Output(Schema):
        """自定义分页输出的格式"""
        page: int
        per_page: int
        total: int
        items: list[Any]  # `items` is a default attribute

    def paginate_queryset(self, queryset, pagination: Input, **params):
        """
        分页函数
        如果启用ES高亮功能，需特殊处理
        """
        page = pagination.page
        per_page = pagination.per_page
        total = queryset.count()
        offset = (page - 1) * per_page
        search = queryset[offset: offset + per_page]

        if getattr(search, 'execute', None):
            items = HandleHighlight(search.execute()).value  # ES查询
            # 数据格式转换 将检索值转成dict格式
            for item in items:
                items[items.index(item)] = item.to_dict()

        else:
            items = search  # pg查询

        return {
            'items': items,
            'page': page,
            'per_page': per_page,
            'total': total,
        }


ninja_api = NinjaAPI(
    title=f'{settings.PROJECT_CODE} 文档中心',
    renderer=CustomRenderer(),
    version='0.1.0',
    description='goods',
    docs_url='/docs' if settings.DEBUG else None,  # 在线文档线上不可用
    openapi_url='/openapi.json' if settings.DEBUG else None,  # open.json线上不可用
    openapi_extra={
        'tags': [
            {
                'name': 'v0.1',
                'description': '商品',
                'externalDocs': {
                    'description': '需求文档',
                    'url': '***'
                }
            }]
    },
)

weixin_46463851

关注

9
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
django中使用elasticsearch-dsl.py

背景：使用django、django-ninja、es进行开发。项目用到高亮检索、按照权重进行评分排序，在此做个记录。使用的是django项目，在app下新建document.py文件。版本需相匹配，es7则使用7.*.*版本，es8则使用版本8。新建api.py文件。
复制链接

扫一扫