subtractDays(time , time) 时间想减的函数
a,2017-02-05,200
a,2017-02-06,300
a,2017-02-07,200
a,2017-02-08,400
a,2017-02-10,600
b,2017-02-05,200
b,2017-02-06,300
b,2017-02-08,200
b,2017-02-09,400
b,2017-02-10,600
c,2017-01-31,200
c,2017-02-01,300
c,2017-02-02,200
c,2017-02-03,400
c,2017-02-10,600
a,2017-03-01,200
a,2017-03-02,300
a,2017-03-03,200
a,2017-03-04,400
a,2017-03-05,600
求连续销售超过三天且最大的记录
1 在CH中建表
create table
tb_shop(name String , ctime Date ,money Float64)
engine=MergeTree
primary key (name,ctime)
order by (name,ctime) ;
2 导入数据
cat shop.csv | clickhouse-client -q "insert into tb_shop FORMAT CSV"
select
name ,
count(1) days
from
(select
name ,
ct ,
idx ,
subtractDays(ct , idx) as diff 一个时间相减的函数
from
(
select
name ,
groupArray(ctime) arr , 把name相同的时间聚合到一起
arrayEnumerate(arr) arr_index 获得角标
from
tb_shop
group by name)t1 根据name分组
array join
arr as ct , 把这个数组炸裂开且开窗口
arr_index as idx 把这个数组炸裂开且开窗口
order by name)t2 根据name排个序
group by name , diff 根据name和时间相同的聚合一下
having days >3 筛选出连续大于3的
order by name , days desc 根据时间降序排序
limit 1 by name 因为排过序了所以第一个是最大的 取name相同的第一条数据
;
arrayMap是一个高阶函数
Array(T)
参数一 是一个函数
参数二 数组
select arrayMap(x->x*x ,[1,2,3,4]) ;
返回的是[1,4,6,8] 参数一的函数是可以改成其他函数的
splitByString 将字符串切割成数组
select splitByString(',' , 'hello,tom');
返回的是['hello' , 'tom']
json 函数
1 在CH中建表
create table tb_json(cont String) engine=Log ;
向表中添加数据
insert into tb_json values('{"movie":"1207","rate":"4","timeStamp":"978300719","uid":"1"}')
,('{"movie":"2028","rate":"5","timeStamp":"978301619","uid":"1"}')
,('{"movie":"531","rate":"4","timeStamp":"978302149","uid":"1"}')
,('{"movie":"3114","rate":"4","timeStamp":"978302174","uid":"1"}')
,('{"movie":"608","rate":"4","timeStamp":"978301398","uid":"1"}')
,('{"movie":"1246","rate":"4","timeStamp":"978302091","uid":"1"}')
,('{"movie":"1357","rate":"5","timeStamp":"978298709","uid":"2"}') ;
添加过数据的表长这个样子
┌─cont──────────────────────────────────────────────────────────┐
│ {"movie":"1207","rate":"4","timeStamp":"978300719","uid":"1"} │
│ {"movie":"2028","rate":"5","timeStamp":"978301619","uid":"1"} │
│ {"movie":"531","rate":"4","timeStamp":"978302149","uid":"1"} │
│ {"movie":"3114","rate":"4","timeStamp":"978302174","uid":"1"} │
│ {"movie":"608","rate":"4","timeStamp":"978301398","uid":"1"} │
│ {"movie":"1246","rate":"4","timeStamp":"978302091","uid":"1"} │
│ {"movie":"1357","rate":"5","timeStamp":"978298709","uid":"2"} │
└───────────────────────────────────────────────────────────────┘
visitParamHas(params, name) 参数一 jsonrowdata 参数二 属性是否有(1 , 0)
visitParamExtractString(params, name) 访问数的一个属性
SELECT visitParamExtractString(cont, 'movie')
FROM tb_json
┌─visitParamExtractString(cont, 'movie')─┐
│ 1207 │
│ 2028 │
│ 531 │
│ 3114 │
│ 608 │
│ 1246 │
│ 1357 │
└────────────────────────────────────────┘
SELECT JSONExtract(cont, 'Tuple(movie String , rate String , uid String)')
FROM tb_json
相当于分开查看json里面的内容
┌─JSONExtract(cont, 'Tuple(movie String , rate String , uid String)')─┐
│ ('1207','4','1') │
│ ('2028','5','1') │
│ ('531','4','1') │
│ ('3114','4','1') │
│ ('608','4','1') │
│ ('1246','4','1') │
│ ('1357','5','2') │
└─────────────────────────────────────────────────────────────────────┘
SELECT CAST(JSONExtract(cont, 'Tuple(movie String , rate String , uid String)').2, 'Float64') + 12
套了一层JSONExtract可以查看其中的一个元素并且可以转换数据类型和计算
FROM tb_json ;
SELECT JSONExtract(cont, 'Tuple(movie String , rate String , uid String)').3
FROM tb_json ;
简化写法查看里面的单个元素但不能转换数据类型和计算
select cast(tupleElement(JSONExtract(cont , 'Tuple(movie String , rate String , uid String)'),2) as Float64)+12 from tb_json;
┌─plus(CAST(tupleElement(JSONExtract(cont, 'Tuple(movie String , rate String , uid String)'), 2), 'Float64'), 12)─┐
│ 16 │
│ 17 │
│ 16 │
│ 16 │
│ 16 │
│ 16 │
│ 17 │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘