Druid的设计比较适合用来查大批量数据(几亿条,T级)的uv,只能查uv数,而不能查具体每个用户id(Druid里面会做压缩,比如970G的数据,存入Druid后只剩下45.8G,大约21倍的压缩比)
一、Druid查询某一标签,各标签值的uv
{
"queryType":"groupBy",
"dataSource":"XXXX_enum_metric",
"granularity":"DAY",
"filter":{
"type":"and",
"fields":[
{
"type":"selector",
"dimension":"tag_name",
"value": "ajk_xf_loupan"
},
{
"type":"in",
"dimension":"tag_value",
"values": ["标签值1","标签值2","标签值3","标签值4","标签值5"]
}
]
},
"dimensions":[
"tag_name",
"tag_value"
],
"aggregations":[
{
"name":"log_total",
"type":"longSum",
"fieldName":"count"
},
{
"type":"hyperUnique",
"name":"ret_hyperUnique_uv",
"fieldName":"uuid_unique"
}
],
"intervals" : ["2020-05-10T00:00:00.000+08:00/2020-05-11T00:00:00.000+08:00"]
}
对应的hiveSql语句
`tag_name` string COMMENT '标签名',
`tag_value` string COMMENT '标签值',
`id_str` string COMMENT '用户id',
`dt_time` string COMMENT '日期yyyy-MM-dd 00:00:00')
select tag_name, tag_value, count(id_str) from (
select tag_name, tag_value, id_str from
XXXX.XXXX_druid_enum_input
where dt='20200317' and id_type='muid' and portrait_proj='ajk'
and tag_name='ajk_xf_loupan' and tag_value in ("标签值1","标签值2","标签值3","标签值4","标签值5")
group by tag_name, tag_value, id_str
) a
group by tag_name, tag_value;
二、Druid查询某一标签,所有标签值的合计uv
{
"queryType":"groupBy",
"dataSource":"XXXX_enum_metric",
"granularity":"DAY",
"filter":{
"type":"and",
"fields":[
{
"type":"selector",
"dimension":"tag_name",
"value": "ajk_xf_loupan"
},
{
"type":"in",
"dimension":"tag_value",
"values": ["标签值1","标签值2","标签值3","标签值4","标签值5"]
}
]
},
"dimensions":[
"tag_name"
],
"aggregations":[
{
"name":"log_total",
"type":"longSum",
"fieldName":"count"
},
{
"type":"hyperUnique",
"name":"ret_hyperUnique_uv",
"fieldName":"uuid_unique"
}
],
"intervals" : ["2020-05-10T00:00:00.000+08:00/2020-05-11T00:00:00.000+08:00"]
}
对应hiveSql语句
select count(1) from (
select tag_name, id_str from (
select tag_name, id_str from
XXXX.XXXX_druid_enum_input
where dt='20200317' and id_type='muid' and portrait_proj='ajk'
and tag_name='ajk_xf_loupan' and tag_value in ("标签值1","标签值2","标签值3","标签值4","标签值5")
) a
group by a.tag_name, a.id_str
) b
group by b.tag_name;