HiveSQL 查询语句

最新推荐文章于 2024-08-07 10:49:21 发布

勾勾黄

最新推荐文章于 2024-08-07 10:49:21 发布

阅读量1.2k

点赞数

分类专栏： HiveSQL 常用语句文章标签： hive 大数据

本文链接：https://blog.csdn.net/weixin_42189651/article/details/114679380

版权

HiveSQL 常用语句专栏收录该内容

3 篇文章 1 订阅

订阅专栏

一、基本查询（select…from…）

列查询

查看列，某几列

--全表查询 (使用 * 查询会返回全部列)
select * from tableA;
--全表查询 指定时间分区(date)
select * from tableA where date = 20210205;
--全表查询 限制100条返回
select * from tableA limit 100;
--指定列 查询
select uid,id_type from tableA;
--指定列 重命名
select uid col1,id_type col2 from tableA;
--DESC
DESC tableA;

过滤语句使用where进行过滤

--使用where语句指定条件 
select * from tableA where date = 20210205 and uid = "12345";
--取某个时间段的数据
select * from tableA where date >= 20210205 and date <= 20210214;
--非空
select * from tableA where uid is not null;

--逻辑判断语句 and 和 or（在hivesql中，and的优先级要比or高，可参考我的另一篇博文）使用or要注意加括号 
--与逻辑 使用 and
select * from tableA where date = 20210205 and uid is not null;
--或逻辑 使用or
select * from tableA where  uid = "12345" or uid = "678910";

数据量查询 pv uv

使用distinct 进行去重

--PV (查询全量取值不去重)
select count(*) from tableA;
--某个字段uv 使用 distinct 去重
select count(distinct uid) from tableA;

二、group by 语句

--group by 后 某字段的全部取值
select uid,collect_set(id_type) from tableA group by uid;
--group by 后 各取值出现频次
select uid,count(id_type) from tableA group by uid;
--group by 后 最大值
select uid,max(id_type) from tableA group by uid;

使用 collect_set 可以查看某个某个取值对应另一列的全部取值，也可以使用collect_list，但是collect_list不会去重。

--取 groupby 中的全部取值collect_set 也可以使用
select  cate1,collect_set(cate2) from tableA where date = 20210119 group by cate1;

三、join 语句

join 默认是内连接

select tableA.id,tableA.name from tableA join tableB on (tableA.id = tableB.id and tableA.name = tableB.name);

inner join 内连接
left join 左外连接
right join 右外连接
full join 满外连接

五、order by 排序语句

order by 默认升序

1). ASC(ascend) 升序(默认); DESC(descend) 降序

2). order by子句在select语句结尾


-- 查询员工信息按工资升序排列
select * from emp order by sal;

-- 查询员工信息按工资降序排列
select * from emp order by sal desc;

-- 多个列排序
-- 按照部门和工资升序排序
select ename, deptno, sal from emp order by deptno, sal ;

-- 按照别名排序
-- 按照员工薪水的2倍排序
select ename, sal*2 twosal from emp order by twosal;

七、case 连续取值列分桶

按照tableA的amount列取值分桶

 select count(case when tableA.amount< 100 then 1 end )as `[0,99]`   ,
         count(case when tableA.amount >= 100 and tableA.amount < 200 then 1 end ) as `[100,199]` ,
         count(case when tableA.amount >= 200 and tableA.amount < 400 then 1 end ) as `[200,399]` ,
         count(case when tableA.amount >= 400 and tableA.amount < 1000 then 1 end ) as `[400,999]` ,
       count(case when tableA.amount >=1000 then 1 end )  as `[1000,null]`
from   tableA;