一 :使用过程中 记录以及筛选条件
二 :查询样列 基础查询 语法结构 sql 语句 以及 对应的 es 查询语法
不为空查询
SQL
SELECT * FROM orders where last_buy_time IS NOT NULL
ES SQL
$esParmas= [
"size"=>0,
"query"=>["bool"=>["filter"=>["exists"=>["field"=>"last_buy_time"]]]]
];
根据条件查询去重计数
SQL
select count(DISTINCT(member_id)) as all_total from member_analysis where member_type IN("consumer","potential") AND last_buy_time >= '1605801599' and buy_times > '2' and avg_amount > '18700' AND (dealer_name LIKE "%xunke%" OR dealer_name LIKE "%xunke%" ) AND (platform LIKE "%xuns%" OR platform LIKE "%xun%")
ESSQL
$esParmas=[
"size"=>0,
"query"=>[
"bool"=>[
"filter"=>[
["terms"=>["member_type"=>["consumer","potential"]]],
["range"=>["last_buy_time"=>["gte"=>1605801599]]],
["range"=>["buy_times"=>["gt"=>2]]],
["range"=>["avg_amount"=>["gt"=>18700]]],
[
"bool"=>["should"=>[
["wildcard"=>["dealer_name"=>"*xunke*"]],
["wildcard"=>["dealer_name"=>"*xunke*"]]
]
]
],
[
"bool"=>["should"=>[
["wildcard"=>["platform"=>"*xuns*"]],
["wildcard"=>["platform"=>"*xun*"]]
]
]
],
]
]
],
"aggs"=>["member_id_quchong"=>["cardinality"=>["field"=>"member_id"]]]
];
备注:其中 包含 cardinlity 是起到去重 计数作用, 和 "collapse"=>["field"=>"member_id"] 中 collapse 去重相比 cardinlity 返回查询去重计数和结果,而 collapse 返回查询去重的结果。
wildcard 是起到 sql 中 like 查询 效果 但是 他必须 前后加* 类似 like 的前后 加%, 而且 和 match match_phrase 区别。 match 查询分子 后 相邻 和 不相邻的字符串 只要保安 字符串的文档都会查出, match_phrase 和 wildcard 查询效果一样 只要包含相邻的 该字符串的文档都可以查询出来, 类似精准模糊查询。
根据条件分组查询计数
SQL
SELECT count(1),reg_source FROM member_analysis where member_type IN("consumer","potential") and reg_time >=1377808000 and reg_time<=1585670400
ESSQL
$esParmas= [
"size"=>0,
"query"=>[
"bool"=>[
"filter"=>[
["terms"=>["member_type"=>["consumer","potential"]]],
["range"=>["reg_time"=>["gte"=>1377808000,"lte"=>1585670400]]]
]
]
],
"aggs"=>[
"group_by_member_type"=>[
"terms"=>["field"=>"reg_source","size"=>1000]
]
]
];
根据条件多字段分组计数 (来源,年月分为分组计数)
SQL
SELECT count(1),reg_source FROM member_analysis where member_type IN("consumer","potential") and reg_time >=1377808000 and reg_time<=1585670400 GROUP BY reg_source,date_ym
ESSQL
$esParmas= [
"size"=>0,
"query"=>[
"bool"=>[
"filter"=>[
["terms"=>["member_type"=>["consumer","potential"]]],
["range"=>["reg_time"=>["gte"=>1377808000,"lte"=>1585670400]]]
]
]
],
"aggs"=>[
"group_by_reg_source"=>[
"terms"=>["script"=>"doc['reg_source.raw'].values +'#split#'+ doc['date_ym'].values","size"=>1000]
]
]
];
备注: 聚合中 script 用于连接某些字段 作为一组进行分组处理。 reg_source 如果是text 设置成 keyword 要 这里 reg_source 的keyword 别名 为 raw
根据条件分组求和
SQL
SELECT count(1),sum(payed) FROM member_analysis where member_type IN("consumer","potential") and reg_time >=1377808000 and reg_time<=1585670400 GROUP BY province_id
ESSQL
$esParmas= [
"size"=>0,
"query"=>[
"bool"=>[
"filter"=>[
["terms"=>["member_type"=>["consumer","potential"]]],
["range"=>["reg_time"=>["gte"=>1377808000,"lte"=>1585670400]]]
]
]
],
"aggs"=>[
"group_by_member_type"=>[
"terms"=>["field"=>"province_id","size"=>1000],
"aggs"=>["total_sum"=>["sum"=>"field"=>"payed"]]
]
]
];
根据条件分组求和只计算桶的个数 只返回桶的个数(返回满足 分组后每个分组大于2的桶的个数) 不返回桶的数据明细
①url 参数后要加 filter_path=aggregations.usercount
$url="http://****************:9200/orders/_search?filter_path=aggregations.usercount";
$esParmas= [
"size"=>0,
"query"=>[
"bool"=>[
"filter"=>[
["terms"=>["member_type"=>["consumer","potential"]]],
["range"=>["order_time"=>["gte"=>1377808000,"lte"=>1585670400]]]
]
]
],
"aggs"=>[
"usercount"=>["sum_bucket"=>["buckets_path"=>"usercount-bucket>usercount-metric"]],
"usercount-bucket"=>[
"terms"=>["field"=>"member_id","min_doc_count"=>2,"size"=>10000000],
"aggs"=>["usercount-metric"=>["max"=>["script"=>["lang"=>"painless","source"=>"return 1"]]]]
]
]
];
返回满足分组计数大于2的所有的桶 并计算每个桶内的个数
sql
POST usercount-bucket/_search
{
"size": 0,
"aggs": {
"usercount-bucket": {
"terms": {
"field": "province_id",
"min_doc_count": 2
},
"aggs": {
"count": {
"max": {
"script":{
"lang":"painless",
"source":"return 1"
}
}
}
}
},
"usercount": {
"sum_bucket": {
"buckets_path": "usercount-bucket>count"
}
}
}
}
结果
返回分组查询满足桶内大于2的个数的详细信息 包含桶内的数据
$esParmas= [
"size"=>0,
"query"=>[
"bool"=>[
"filter"=>[
["terms"=>["member_type"=>["consumer","potential"]]],
["range"=>["reg_time"=>["gte"=>1377808000,"lte"=>1585670400]]]
]
]
],
"aggs"=>[
"usercount"=>["sum_bucket"=>["buckets_path"=>"usercount-bucket>usercount-metric"]],
"usercount-bucket"=>[
"terms"=>["field"=>"province_id","size"=>100,"min_doc_count"=>2],
"aggs"=>["usercount-metric"=>["cardinality"=>["field"=>"province_id"]]]
]
]
];