本地模式开启
set hive.exec.mode.local.auto=true;
set hive.exec.mode.local.auto.inputbytes.max=51234560;
set hive.exec.mode.local.auto.input.files.max=10;
可以不走mapreduce
set hive.fetch.task.conversion=more;
修改 hive-default.xml
<property>
<name>hive.fetch.task.conversion</name>
<value>more</value>
<description>
Expects one of [none, minimal, more].
Some select queries can be converted to single FETCH task minimizing latency.
Currently the query should be single sourced not having any subquery and should not have
any aggregations or distincts (which incurs RS), lateral views and joins.
0. none : disable hive.fetch.task.conversion
</description>
</property>
数据倾斜
set hive.map.aggr = true;
set hive.groupby.mapaggr.checkinterval = 100000;
set hive.groupby.skewindata = true;
set hive.exec.reducers.bytes.per.reducer=32123456;
SQL优化
SELECT count(id) FROM (SELECT id FROM table1 GROUP BY id) a;
SELECT a.id
FROM table1 a
LEFT JOIN table2 b ON (b.id <= 10 AND a.id = b.id);
SELECT a.id
FROM table2 a
RIGHT JOIN (SELECT id
FROM table1
WHERE id <= 10
) b ON a.id = b.id;
INSERT overwrite TABLE part_table2 PARTITION (p_time)
SELECT id, time, uid, keyword, url_rank, click_num, click_url, p_time
FROM part_table1;
动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=1000;
set hive.exec.max.dynamic.partitions.pernode=100;
set hive.exec.max.created.files=100000;
set hive.error.on.empty.partition=false;