Hive Views
视图是一种通过在虚拟表中隐藏子查询、连接、函数来简化查询的逻辑结构
不存储数据或得到具体化
创建视图后,将立即冻结其架构。
如果删除或更改基础表,则查询视图将失败。
视图是只读的,不能用作LOAD/INSERT/ALTER的目标。
hive views 常用操作
建立视图支持cte,order by,limit,jion,ect。
create view view_name as select --创建视图
show tables;(show views after hive v2.2.0) --查找视图
show create table view_name --显示view 定义用
drop view view_name --删除view
alter view view_name set tblproperties('comment' = 'this is a view');
--更改视图属性
alter view view_name as select --更改视图定义
hive lateral view 侧视图
应用表生成函数,然后将函数输入和输出连接在一起。
横向视图外部将产生结果,甚至输出为空。
支持多级
经常用于规范化行或JSON解析器
desc formatted name --在 Table Type 一栏查看是 table or view
查看表中数据
0: jdbc:hive2://localhost:10000> select * from emp;
+-----------+-------------------------+----------------------------+-----------------------+----------------------------------------+
| emp.name | emp.work_place | emp.sex_age | emp.skills_score | emp.depart_title |
+-----------+-------------------------+----------------------------+-----------------------+----------------------------------------+
| chael | ["Nontreal","Toronto"] | {"sex":"Male","age":30} | {"DB":null} | {"Prcduct":["DeweloperLead"]} |
| will | ["Montreal"] | {"sex":"Male","age":35} | {"Perl":85} | {"Prodact":["Lead"],"Test":["LEAD"]} |
| Shelley | ["New York"] | {"sex":"female","age":27} | {"Python":80} | {"Test":["Lead"],"COE":["Architect"]} |
| Lucy | ["Vancouver"] | {"sex":"Female","age":57} | {"Sales":89,"HR":94} | {"Sales":["Lead"]} |
+-----------+-------------------------+----------------------------+-----------------------+----------------------------------------+
使用侧视图 explode就是将hive一行中复杂的array或者map结构拆分成多行
0: jdbc:hive2://localhost:10000> select * from emp lateral view explode(work_place) a as loc;
+-----------+-------------------------+----------------------------+-----------------------+----------------------------------------+------------+
| emp.name | emp.work_place | emp.sex_age | emp.skills_score | emp.depart_title | a.loc |
+-----------+-------------------------+----------------------------+-----------------------+----------------------------------------+------------+
| chael | ["Nontreal","Toronto"] | {"sex":"Male","age":30} | {"DB":null} | {"Prcduct":["DeweloperLead"]} | Nontreal |
| chael | ["Nontreal","Toronto"] | {"sex":"Male","age":30} | {"DB":null} | {"Prcduct":["DeweloperLead"]} | Toronto |
| will | ["Montreal"] | {"sex":"Male","age":35} | {"Perl":85} | {"Prodact":["Lead"],"Test":["LEAD"]} | Montreal |
| Shelley | ["New York"] | {"sex":"female","age":27} | {"Python":80} | {"Test":["Lead"],"COE":["Architect"]} | New York |
| Lucy | ["Vancouver"] | {"sex":"Female","age":57} | {"Sales":89,"HR":94} | {"Sales":["Lead"]} | Vancouver |
+-----------+-------------------------+----------------------------+-----------------------+----------------------------------------+------------+
简化信息 a为 lateral view 的别名,loc为结果集的别名
0: jdbc:hive2://localhost:10000> select name,loc from emp lateral view explode(work_place) a as loc;
+----------+------------+
| name | loc |
+----------+------------+
| chael | Nontreal |
| chael | Toronto |
| will | Montreal |
| Shelley | New York |
| Lucy | Vancouver |
+----------+------------+
创建 null 列 split()将字符串转化为字符串数组
0: jdbc:hive2://localhost:10000> select name,loc from emp lateral view outer explode(split(null,',')) a as loc;
+----------+-------+
| name | loc |
+----------+-------+
| chael | NULL |
| will | NULL |
| Shelley | NULL |
| Lucy | NULL |
+----------+-------+
Hive select(数据映射)
虚拟列(两个连续下划线,用于数据验证)输入文件名,它是映射任务的输入文件名。
INPUT FILE NAME --这是mapper任务的输入文件的名称。
BLOCK OFFSET INSIDE FlLE --哪个是当前的全局文件
Inner JOIN select * from a join b on a.k = b.k
lmplicit JOIN select * from a, b where a.k = b.k
Outer JOIN select * from a left join b where a.k = b.k
Cross JOIN select * from a join b where 1 = 1
Inequality JOIN (support after v2.2.0)
--与一侧的多个输入相结合
select * from a right join b on
(a.key + b.key >= 100 and a.key + b.key<= 102);
--没有引用的连接
select * from a full outer join b on
(a.value between 100 and 102) and
(a.value between 100 and 102);
in or not in
select * from a where z.col in (select col_b from b);
exists and not exists
select col from a where exists (select col_b from b where a.k=b.k)
Hive Jion --MAPJION
MAPJOIN语句表示仅通过map进行连接,而不使用reduce作业。MAPJOIN语句将小表中的所有数据读取到内存中并广播到所有映射。
一旦设置hive.auto.convert。如果可能的话,Hive会在运行时自动将join转换为MAPJOIN,而不是检查MAPJOINhint。这是现在的默认设置。
SELECT/*+MAPJOIN(employee)*/ emp.name, emph.sin_number
FROM employee emp JOIN employee_hr emph ON emp.name = emph.name;
MAPJOIN操作符不支持下列操作:
Use MAPJOIN after UNION ALL,LATERAL VIEW,GROUP BY/JOIN/SORTBY/CLUSTER BY/DISTRIBUTE BY
Use MAPJOIN before by UNION,JOIN and other MAPJOIN
Hive Set Ops (集合操作) - UNION
UNION ALL 在工会后保持重复
UNION 删除重复,支持从 v1.2.0 可以在顶级查询中使用。
所有子集数据必须具有相同的名称和类型。否则,隐式转换将完成,并且可能有运行时异常。
order by,sort by,cluster by,distribute by or limit 应用于结合后的整体效果
select key from
(select key from src1 order by key limit 10)
sub union all
select key from src2 order by key limit 10;
Hive Date Movement - LOAD
为了在Hive中移动数据,它使用了LOAD关键字。移动到这里意味着原始数据被移动到目标表/分区,并且不再存在于原始位置。
LOAD DATA LOCAL INPATH '/home/dayongd/Downloads/employee_hr.txt'
OVERNRITE INTO TABLE employee_hr;
LOAD DATA LOCAL INPATH '/home/dayongd/Downloads/employee.txt'
OVERNRITE INTO TABLE employee_partitioned PARTITION (year=2014,month-12);
LOAD DATA INPATH '/user/dayongd/employee/employee.txt'
OVERWRITE INTO TABLE employee;
LOAD DATA INPATH 'hdfs://ldfs_host]:8020/user/dayongd/employee/employee.txt'
OVERRITE INTO TABLE employee;
LOCAL 本地指定文件位于主机中。否则,文件位于HDFS中。
OVERWRITE 用于决定是否追加或替换现有数据。
Hive insert to table
--insert from constant failed
insert overwrite table test select 'will';
--插入查询
insert into employee select * from ctas_employee;
--具有更好的性能的多次插入
from ctas_employee
insert overwrite table employee select *
insert overwrite table employee_internal select *;
--插入分区
from ctas_partitioned
insert overwrite table employee partition(year,month)
select *,'2018','09';
--按指定列列表插入,未指定的为空
insert into employee(name) select "John" from test limit 1
--按指定值插入
insert into employee(name) values ('Judy'),('john');
insert to file example
插入到本地文件、hdfs和表中
from ctas_employee
insert overwrite local directory '/tmp/out1' select *
insert overwrite directory '/tmp/out2' select *
insert overwrite table employee_internal select *;
插入指定格式的数据
insert overwrite directory '/tmp/out3'
row format delimited fields terminated by ',' --分割符
select * from ctas_employee
从表中获取文件的其他方法
hdfs dfs -getmerge <table_file_path>
hive date exchange
IMPORT和EXPORT语句用于数据迁移
所有数据和元数据的导出/导入不需要数据库。
EXPORT语句导出文件中名为data和metadata 之中的一个 called_metadata 文件
EXPORT TABLE employee TO '/tmp/output3';
EXPORT TABLE employee_partitioned partition (year=2014, month=11) TO '/tmp/output5';
导出之后,我们可以手动地将导出的文件复制到另一个HDFS。然后,用lMPORT语句导入它们。
IMPORT TABLE empolyee_imported FROM '/tmp/output3';
IMPORT TABLE employee_partitioned_imported FROM '/tmp/output5';