类型转换函数
select cast("3" as int);将3转换为int类型
select cast("2012-02-02" as date);将符合格式的字符串转换为 date类型,只能转换中间为'-'格式的字符串
select cast(current_timestamp as date);当前时间
数学运算函数
select round(3.2); 四舍五入
select round(3.2215,3); 保留三位小数并且四舍五入
select ceil(5.2); 向上取整
select floor(5.6); 向下取整
select abs(-2); 取绝对值
select greatest(a,b,c,d,...); 取最大值
select least(); 取最小值
字符串函数
select substr(string str,int start); 从某个索引开始截取字符串
select substr(string str,int start,int len);
select concat(string A,string B...);拼接字符串
select concat_ws(string SEP,string A,string B...); 拼接结果为ASEPB
select length(string A); 字符串长度
select split(string str,string pat);切分字符串,返回数组
select upper(string str); 将字符串转为大写
select lower(string str);将字符串转为小写
时间函数
select current_timestamp; 返回当前系统时间 年月日时分秒
select current_date; 返回当前事件 返回值类型年月日
select unix_timestamp(); 时间戳 距离1970-01-01毫秒数
select from_unixtime(unix_timestamp(),'yyyy-MM-dd hh:mm:ss');时间戳转换字符串
select to_date('2017-09-17 16:58:32');将字符串转成日期
集合函数
select array(1,2,3,5,8); 构造一个数组
select array_contains(array(1,5,65),value);判断value是否在数组中,返回boolean值
select sort_array(array(....));返回排序后的数组
select size(array(..));返回一个集合的长度
分组函数
sum
avg
max
min
count
collect_set() : 将某个字段在一组数据中的所有值形成一个集合返回
select id,name,collect_set(…) from 表 group by id,name; 前面查什么,分组时就要用什么
explode() : 对数组字段炸裂 大爆炸,用法同上
表生成函数json_tuple
{"movie":"1193","rate":"5","timeStamp":"978300760","uid":"1"}
{"movie":"661","rate":"3","timeStamp":"978302109","uid":"1"}
{"movie":"914","rate":"3","timeStamp":"978301968","uid":"1"}
{"movie":"3408","rate":"4","timeStamp":"978300275","uid":"1"}
{"movie":"2355","rate":"5","timeStamp":"978824291","uid":"1"}
{"movie":"1197","rate":"3","timeStamp":"978302268","uid":"1"}
{"movie":"1287","rate":"5","timeStamp":"978302039","uid":"1"}
{"movie":"2804","rate":"5","timeStamp":"978300719","uid":"1"}
{"movie":"594","rate":"4","timeStamp":"978302268","uid":"1"}
{"movie":"919","rate":"4","timeStamp":"978301368","uid":"1"}
create table t_rating(line string)row format delimited;
load data local inpath '/root/rating.json' into table t_rating;
select * from t_rating ;
将一个JSON数据拆分成多个字段
select json_tuple(line,"movie","rate","timeStamp","uid")
as (movie,rate,ts,uid) from t_rating;