set hive.cli.print.current.db=true;
set hive.exec.mode.local.auto=ture;
set hive.cli.print.header=true;
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
set hive.optimize.skewjoin=true;
set hive.optimize.skewjoin.compiletime=true;
set hive.exec.compress.output=true;
set hive.exec.compress.intermediate=true;
set mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapred.output.compression.type=BLOCK;
set hive.exec.parallel=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.join=true;
set hive.cbo.enable=true;
set hive.vectorized.execution.enabled = true;
set hive.vectorized.execution.reduce.enabled = true;
set mapred.job.reuse.jvm.num.tasks=10;
set hive.optimize.index.filter=true;
set hive.exec.mode.local.auto=ture;
set hive.cli.print.header=true;
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
set hive.optimize.skewjoin=true;
set hive.optimize.skewjoin.compiletime=true;
set hive.exec.compress.output=true;
set hive.exec.compress.intermediate=true;
set mapred.map.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapred.output.compression.type=BLOCK;
set hive.exec.parallel=true;
set hive.merge.mapfiles=true;
set hive.merge.mapredfiles=true;
set hive.auto.convert.join=true;
set hive.cbo.enable=true;
set hive.vectorized.execution.enabled = true;
set hive.vectorized.execution.reduce.enabled = true;
set mapred.job.reuse.jvm.num.tasks=10;
set hive.optimize.index.filter=true;
set hive.exec.reducers.max=50;
具体为什么要作这些配置 请查看官网配置
https://cwiki.apache.org/confluence/display/Hive/Configuration+Properties
我总结一下吧,效果最明显的就是
1.tez引擎或者是spark引擎
2.使用压缩和使用orc文件格式
3.剩余的就比较分散了