背景:使用elasticsearch-dsl,每次写入数据脚本都需要手动编写。耗费太多时间。固转用django-elasticsearch-dsl,官方文档。
使用的elasticsearch版本为7.17,安装django-elasticsearch-dsl=7.4
pip install elasticsearch==7.17.6
pip install django-elasticsearch-dsl=7.4
1.setting.py
# 配置elasticsearch链接及账号密码
# 该配置项必须有,否则无法启动项目
ELASTICSEARCH_DSL={
'default': {
'hosts': 'localhost:9200',
'http_auth': ('username', 'password')
}
}
2.通用装饰器
def query_fields_decorator(model):
"""
query_fields_decorator
"""
def get_query_fields():
"""
获取查询字段
"""
query = []
for field in model._meta.get_fields():
if field.__class__.__name__ in ('CharField', 'TextField'):
query.append(field.name)
return query
model.get_query_fields = get_query_fields
return model
3.models.py
from django.db import models
from .query_fields_decorator import query_fields_decorator
@query_fields_decorator
class Goods(models.Model):
"""
Dataset
"""
good_id = models.CharField(max_length=100, primary_key=True)
title = models.CharField(max_length=255, default='')
name = models.CharField(max_length=100)
technology = models.CharField(max_length=100)
good_img = models.FileField(upload_to='public/img/', default='', blank=True)
update_date = models.CharField(max_length=100, blank=True)
@property
def good_img_url(self):
return self.good_img.url if self.good_img else ''
@property
def techs(self):
"""
数据在关系型数据库中存放格式为:a ; b ; c ;
若要在es中进行聚合查询,需将数据存放为数组格式: [a, b, c]
该函数为预处理操作,供document.py调用。
"""
return [tech.strip() for tech in self.technology.split(';') if tech]
4.document.py
from elasticsearch_dsl import Q
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from .models import Goods
def base_query(model, search, filters):
"""检索通用函数"""
if not filters.query:
query_dsl = Q('match_all')
else:
query_dsl = Q('bool',
should=[
Q('multi_match',
query=filters.query,
fields=model.get_query_fields(),
)
],
minimum_should_match=1
)
highlight_fields = set(model.get_query_fields()) # pylint:disable=E1101
search = search.query(query_dsl).extra(track_total_hits=True)
return search, highlight_fields, query_dsl
def apply_filter(search, field_name, field_value):
"""侧边栏通用处理"""
if field_value:
if ',' in field_value:
# 前端检索多个使用逗号分开时 将值转成列表格式
values = [item.strip() for item in field_value.split(',')]
search = search.filter('terms', **{f'{field_name}.keyword': values})
else:
search = search.filter('term', **{f'{field_name}.keyword': field_value})
return search
# @registry.register_document: 将model与es关联起来
@registry.register_document
class GoodDocument(Document):
good_id = fields.KeywordField()
good_img = fields.TextField(
attr='good_img_url'
)
technology = fields.TextField(
# techs为model中定义的函数
attr='techs',
fields={
'keyword': fields.KeywordField(ignore_above=256)
}
)
# 文本格式的时间实现排序需要设置keyword。
update_date = fields.TextField(
analyzer='snowball',
fields={
'keyword': fields.KeywordField(ignore_above=256)
}
)
class Index:
# es中索引名称,每个索引作为一张数据库表
name = ***
settings = {
'number_of_shards': 1,
'number_of_replicas': 0
}
class Django:
model = Goods
# model的字段,除掉以上已经定义特定格式的good_id、technology、update_date等其它字段,可根据需求修改。
fields = [
'title',
'name'
]
@staticmethod
def build_query_dsl(search, filters, sort):
"""
构建es query dsl
:param search: ES search对象
:param filters: 查询关键词
:param sort: 排序
"""
search, highlight_fields, query_dsl = base_query(Dataset, search, filters) # pylint:disable=E1101
search = apply_filter(search, 'technology', filters.technology)
if sort == '':
search = search.sort('update_date.keyword')
else:
# 定义自定义评分函数
# 按照权重title>name, score为自定义数字,可根据需要更改
score_function = {
'script_score': {
'script': {
'lang': 'painless',
'source': '''
double score = 0;
if (doc['title.keyword'].size() > 0) {
score = 3;
} else if (doc['name.keyword'].size() > 0) {
score = 2;
}
return score;
'''
}
}
}
search = search.query('function_score', query=query_dsl, functions=score_function, score_mode='sum',
boost_mode='replace')
search = search.sort('_score')
if filters.query:
# 高亮设置
search = search.highlight(*highlight_fields).highlight_options(number_of_fragments=0)
return search
5.views.py
这里使用的django-ninja框架
def filter_buckets(buckets):
"""
将桶聚合结果转换为列表
"""
return [{'key': bucket.key, 'doc_count': bucket.doc_count} for bucket in buckets]
@route.get('/good/get_good_sidebar',
tags=['v0.1'],
response=DatasetSidebarOut,
summary='侧边栏聚合查询')
def get_good_sidebar(_, filters: GoodIn = Query(...)):
"""
获取 DATASET 的统计信息
"""
search = GoodDocument.search()
search = GoodDocument.build_query_dsl(search, filters, '')
# 多个聚合如下格式
search.aggs.bucket('***_terms', 'terms', field='***.keyword')
search.aggs.bucket('technology_terms', 'terms', field='technology.keyword')
result = search.execute()['aggregations']
data = {'***': filter_buckets(result.***_terms.buckets),
'technology': filter_buckets(result.technology_terms.buckets))
}
return data