一、com
2者都是用来把2个结果集进行上下合并的
要求2字段的数量和类型要一致。不过在hive中,类型一致的要求没那么严格
二、区别:union是去重 + 排序,union all是简单的堆叠且不排序,union all性能好
1.去重是要求select的所有字段都一样
2.排序是根据主键,如果没有主键,或者是在hive中,就根据select列表中的第一个字段正序排。一般都会手动指定
三、mod
1.数据准备
CREATE TABLE Person (
PersonID int,
LastName varchar(255),
FirstName varchar(255),
Address varchar(255),
City varchar(255)
);
INSERT INTO Person VALUES (1, 'Tom', 'Erichsen', 'Skagen 215, Stavanger 4006', 'Norway');
INSERT INTO Person VALUES (2, 'Tom', 'Erichsen', 'Skagen 214, Stavanger 4006', 'Norway');
INSERT INTO Person VALUES (3, 'Tom', 'Erichsen', 'Skagen 213, Stavanger 4006', 'Norway');
INSERT INTO Person VALUES (4, 'Tom', 'Erichsen', 'Skagen 212, Stavanger 4006', 'Norway');
INSERT INTO Person VALUES (5, 'Tom', 'Erichsen', 'Skagen 211, Stavanger 4006', 'Norway');
2.PersonID在左边,则根据PersonID正序排
SELECT PersonID,Address FROM Person where PersonID <=2
union
SELECT PersonID,Address FROM Person where PersonID >=2
;
1|Skagen 215, Stavanger 4006
2|Skagen 214, Stavanger 4006
3|Skagen 213, Stavanger 4006
4|Skagen 212, Stavanger 4006
5|Skagen 211, Stavanger 4006
3.Address 在左边,则根据Address 正序排
SELECT Address,PersonID FROM Person where PersonID <=2
union
SELECT Address,PersonID FROM Person where PersonID >=2
;
Skagen 211, Stavanger 4006|5
Skagen 212, Stavanger 4006|4
Skagen 213, Stavanger 4006|3
Skagen 214, Stavanger 4006|2
Skagen 215, Stavanger 4006|1