SELECT … FROM Clauses
普通SELECT
hive> SELECT name, salary FROM employees;
hive> SELECT e.name, e.salary FROM employees e;数组SELECT
SELECT name, subordinates[0] FROM employees;
MAP SELECT
SELECT name, deductions[“State Taxes”] FROM employees;
STRUCT SELECT
hive> SELECT name, address.city FROM employees;
正则表达式SELECT
hive> SELECT symbol,
price.*
FROM stocks;
列值计算
hive> SELECT upper(name), salary, deductions[“Federal Taxes”],
round(salary * (1 - deductions[“Federal Taxes”])) FROM employees;
函数
聚合函数
hive> SELECT count(*), avg(salary) FROM employees;
可以通过 SET hive.map.aggr=true; 改善性能, 但是这个设置会消耗更多的内存
DISTINCT SELECT
hive> SELECT count(DISTINCT ymd), count(DISTINCT volume) FROM stocks;
Table generating functions
与聚合函数作用相反,会把某一个列值扩展为多行或多列
hive> SELECT explode(subordinates) AS sub FROM employees
当使用Table generating functions时, 需要使用表别名
其它内置函数: 字符串处理
LIST语句
hive> SELECT upper(name), salary, deductions[“Federal Taxes”],
round(salary * (1 - deductions[“Federal Taxes”])) FROM employees
LIMIT 2;
列别名
hive> SELECT upper(name), salary, deductions[“Federal Taxes”] as fed_taxes,
round(salary * (1 - deductions[“Federal Taxes”])) as salary_minus_fed_taxes
FROM employees LIMIT 2;
嵌套SELECT
hive> FROM (
SELECT upper(name), salary, deductions[“Federal Taxes”] as fed_taxes,
round(salary * (1 - deductions[“Federal Taxes”])) as salary_minus_fed_taxes
FROM employees
) e
SELECT e.name, e.salary_minus_fed_taxes
WHERE e.salary_minus_fed_taxes > 70000;
CASE … WHEN … THEN语句
hive> SELECT name, salary,
CASE
WHEN salary < 50000.0 THEN ‘low’
WHEN salary >= 50000.0 AND salary < 70000.0 THEN ‘middle’
WHEN salary >= 70000.0 AND salary < 100000.0 THEN ‘high’
ELSE ‘very high’
END AS bracket FROM employees;
不调用MapReduce的SELECT
SELECT * FROM employees
WHERE country = ‘US’ AND state = ‘CA’
LIMIT 100;
设置本地模式,不调用mapreduce
set hive.exec.mode.local.auto=true;
Predicate Operators
浮点数比较
hive> SELECT name, salary, deductions[‘Federal Taxes’] FROM employees
WHERE deductions[‘Federal Taxes’] > cast(0.2 AS FLOAT);