数据查询
select [all | distinct] select_expr, ...
from table_reference
[where where_condition]
[group by col_list]
[cluster by col_list] | [distribute by col_list] [sort by col_list]
[limit number]
查找正则表达式的列:
select '(ds|dh)? +.+' from sales
多路group by
from pv_users
insert overwrite table pr_gender_sum
select pv_users.gender, count(distinct pv_users.userid)
group by pv_users.gender
insert overwrite directory '/路径'
select pv_users.age, count(distinct pv_users.userid)
group by pv_users.age
区别
order by全局排序
sort by 保证reducer输出有序
distribute by 将数据分到同个reducer
cluster by = distribute by+ sort by
JOIN
内关联:select a.* from a join b on (a.id=b.id)
外关联&
select [all | distinct] select_expr, ...
from table_reference
[where where_condition]
[group by col_list]
[cluster by col_list] | [distribute by col_list] [sort by col_list]
[limit number]
查找正则表达式的列:
select '(ds|dh)? +.+' from sales
多路group by
from pv_users
insert overwrite table pr_gender_sum
select pv_users.gender, count(distinct pv_users.userid)
group by pv_users.gender
insert overwrite directory '/路径'
select pv_users.age, count(distinct pv_users.userid)
group by pv_users.age
区别
order by全局排序
sort by 保证reducer输出有序
distribute by 将数据分到同个reducer
cluster by = distribute by+ sort by
JOIN
内关联:select a.* from a join b on (a.id=b.id)
外关联&