1. 创建表
CREATE TABLE employees (
name STRING,
salary FLOAT,
subordinates ARRAY<STRING>,
deductions MAP<STRING, FLOAT>,
address STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
)
PARTITIONED BY (country STRING, state STRING)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':';
数据内容:
John Doe,100000.0,MarySmith|Todd Jones,Federal Taxes:.2|State Taxes:.05|Insurance:.1,1 MichiganAve.|Chicago|IL|60600
MarySmith,80000.0,Bill King,Federal Taxes:.2|State Taxes:.05|Insurance:.1,100Ontario St.|Chicago|IL|60601
ToddJones,70000.0,,Federal Taxes:.15|State Taxes:.03|Insurance:.1,200 ChicagoAve.|Oak Park|IL|60700
BillKing,60000.0,,Federal Taxes:.15|State Taxes:.03|Insurance:.1,300 ObscureDr.|Obscuria|IL|60100
2. 基本查询:
查询整个表:
select * from employees;
查询单个字段:
select name from employees;
查询数组:
select subordinates from employees;
select subordinates[0] from employees;
查询MAP:
select deductions from employees;
select deductions['Federal Taxes'] from employees;
查询结构体:
select address from employees;
select address.street from employees;
使用分区过滤:
select * from employees where country = 'CHINDA';
可以使用任意字段放在where 谓词中
3. 数据函数,计算数值
select bigint(salary) from employees;
4. 聚合函数
select sum(salary) from employees group by country;
select sum(salary) from employees group by address.street;
5. 表生成函数,一个字段裂成多行
select explode(subordinates) as sub from employees;
另外更多内置函数,可查询文档
6. 嵌套查询
select tmp_table.name from (select name, salary from employees) tmp_table;
7. case when
select sum(case when country = 'US' then salary end) as cc from employees group by country;
8. sort by 和 order by
order by 是正常排序
sort by 会现在每个reducer 中先执行排序
9. distribute 按某个字段分区后 在每个reducer上排序
select country, state, name, salary from employees distribute by country sort by country, state;