hive之DML

最新推荐文章于 2023-05-15 16:06:47 发布

Wuyikkk

最新推荐文章于 2023-05-15 16:06:47 发布

阅读量123

点赞数

分类专栏： hive

本文链接：https://blog.csdn.net/Wuyikkk/article/details/113761596

版权

hive 专栏收录该内容

4 篇文章 0 订阅

订阅专栏

一、DML

1. 数据导入

--从本地磁盘或者DHFS导入数据
load data [local] inpath '/opt/module/datas/student.txt' [overwrite] into table student [partition (partcol1=val1,…)];

--例子
load data local inpath '/opt/module/datas/student.txt' overwrite into table student;

--先在hdfs://hadoop102:8020/xxx文件夹上传一份student.txt
--HDFS的导入是移动，而本地导入是复制
load data inpath '/xxx/student.txt' overwrite into table student;

--Insert导入
insert into table student select id, name from stu_par where class="01";

建表时候用as select，查询的结果会添加到新创建的表中

--建表时候通过location加载
--先在hdfs://hadoop102:8020/xxx文件夹上传一份student.txt
create external table student2
(id int, name string)
row format delimited fields terminated by '\t'
location '/xxx';

2. 数据导出

--Insert导出
insert overwrite local directory '/opt/module/datas/export/student'
select * from student;

--带格式导出
insert overwrite local directory '/opt/module/datas/export/student1'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select * from student;

#bash命令行导出
hive -e "select * from default.student;" > /opt/module/datas/export/test.txt

--整张表export到HDFS
export table student to '/export/student';

--从导出结果导入到Hive
import table student3 from '/export/student';

3. 数据删除

--只删表数据，不删表本身
truncate table student;

二、基本查询

--建立员工表
create table if not exists emp(
empno int,
ename string,
job string,
mgr int,
hiredate string, 
sal double, 
comm double,
deptno int)
row format delimited fields terminated by '\t';

--建立部门表
create table if not exists dept(
deptno int,
dname string,
loc int
)
row format delimited fields terminated by '\t';

--导入数据
load data local inpath '/opt/module/datas/dept.txt' into table
dept;
load data local inpath '/opt/module/datas/emp.txt' into table emp;

--全表查询
select * from emp;

--查询某些列
select empno, ename from emp;

--起别名
select ename as name from emp;
--as可以省略
select ename name from emp;

--运算符
select ename, sal + 10 from emp;

--UDF函数
select substring(ename, 1, 1) from emp;
--UDAF函数
select count(*) from emp;

--limit，取前几行
select * from emp limit 5;

三、条件过滤

--查询工资大于1000的人
select * from emp where sal > 1000;

--通配符字符串匹配 % _
--以A开头的员工
select * from emp where ename like "A%";

--正则匹配
--以A开头的员工
select * from emp where ename rlike "^A";

简单正则

一般字符匹配自己
^ 匹配一行开头 ^R 以R开头
$ 匹配一行结束 R$ 以R结尾
. 匹配任意字符 ^.$ 一行只有一个字符
* 前一个子式匹配零次或多次
[] 匹配一个范围内的任意字符
\ 转义

--与或非
select * from emp where empno = 30 and sal > 1000;

四、分组

--计算emp表每个部门的平均工资
select deptno, avg(sal) aa from emp group by deptno;

--分组过滤
--计算部门平均工资大于2000的部门
select deptno, avg(sal) aa from emp group by deptno having aa>2000;

五、连接

--查询员工编号，姓名以及部门所在名称
select
    e.empno,
    e.ename,
    d.dname
from
    emp e
join
    dept d
on
    e.deptno=d.deptno;

--多表连接
SELECT e.ename, d.dname, l.loc_name
FROM   emp e 
JOIN   dept d
ON     d.deptno = e.deptno 
JOIN   location l
ON     d.loc = l.loc;

六、排序

--按照工资降序排序(全局排序)
select *
from emp
order by sal desc;

--多条件排序，先按部门排序，再按工资排序
select *
from emp
order by
deptno asc,
sal desc;

--一般需求不会要求给所有的数据排序，而要求求前几
--求工资前10的人,Map会先求局部前10
select *
from emp
order by sal desc
limit 10;

--还有一种可能，我们只需要看大概的数据趋势，不需要全排序
--Hive的局部排序
select *
from emp
sort by empno desc;

--指定局部排序的分区字段
select * from emp
distribute by empno
sort by sal desc;

--如果分区和排序的字段一样，我们可以用cluster by代替
select * from emp distribute by empno sort by empno;
select * from emp cluster by empno;

七、分桶

--创建分桶表
create table stu_buck(id int, name string)
clustered by(id) 
into 4 buckets
row format delimited fields terminated by '\t';

--向分桶表中插入数据
load data local inpath '/opt/module/datas/student.txt' into table stu_buck;

--tablesample是抽样语句，语法：TABLESAMPLE(BUCKET x OUT OF y) 
select * from stu_buck tablesample(bucket 1 out of 4 on id);

Wuyikkk

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hive之DML

一、基本查询--建立员工表create table if not exists emp(empno int,ename string,job string,mgr int,hiredate string, sal double, comm double,deptno int)row format delimited fields terminated by '\t';--建立部门表create table if not exists dept(deptno int,dname
复制链接

扫一扫

专栏目录