优化hive的性能配置

-- 优化hive性能:tez,spark,lmpala,mapreduce; 矢量化--
set hive.execution.engine = spark;
set hive.vectorized.execution.enabled = true;
set hive.vectorized.execution.reduce.enabled = true;
## 别人的优化经验
#set hive.exec.compress.output=true;
#set mapred.output.compress=true;
#set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec;
#set io.compression.codecs=org.apache.hadoop.io.compress.GzipCodec;
#set mapred.reduce.tasks=200;
#set mapreduce.reduce.shuffle.memory.limit.percent=0.10;
#set yarn.nodemanager.pmem-check-enabled=false;
#set yarn.nodemanager.vmem-check-enabled=false;
#set mapreduce.map.memory.mb=3072;
#set mapreduce.reduce.memory.mb=3072;

## ES的优化经验
#set hive.exec.dynamic.partition = true;
#set hive.exec.dynamic.partition.mode = nonstrict;
#set hive.exec.max.dynamic.partitions=10000;
#set hive.exec.max.dynamic.partitions.pernode=4000;
#set hive.mapred.reduce.tasks.speculative.execution=false;

## 书上的优化经验
set hive.auto.convert.join=true;
#set hive.exec.mode.local.auto=true;
#set hive.exec.parallel=true; #太吓人,禁止使用。

## 网上的优化经验
#set hive.groupby.skewindata=true;

## 自己的优化经验
#set hive.execution.engine = spark;
#set hive.vectorized.execution.enabled = true;
#set hive.vectorized.execution.reduce.enabled = true;
阅读更多
版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sinat_20174131/article/details/79970892
个人分类: DB tip
上一篇安装pytorch (win10)
下一篇sql统计字段
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭