配置参数说明
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000000;
set hive.mapjoin.smalltable.filesize=200000000;
set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.hadoop.supports.splittable.combineinputformat = true;
set mapreduce.input.fileinputformat.split.maxsize = 256000000;
set mapreduce.input.fileinputformat.split.minsize =256000000;
set mapreduce.input.fileinputformat.split.minsize.per.node=256000000;
set mapreduce.input.fileinputformat.split.minsize.per.rack=256000000;
SET mapred.max.split.size = 256000000;
SET mapred.min.split.size = 256000000;
SET mapred.min.split.size.per.node = 256000000;
SET mapred.min.split.size.per.rack = 256000000;
set hive.merge.mapfiles = true;
set hive.merge.mapredfiles = true;
set hive.merge.size.per.task = 256000000;
set hive.merge.smallfiles.avgsize=256000000;
set hive.merge.orcfile.stripe.level=false;
set hive.merge.rcfile.block.level=false;
SET hive.exec.parallel = true;
SET hive.exec.parallel.thread.number = 16;
set hive.exec.reducers.bytes.per.reducer=5120000000;
set mapred.reduce.tasks=50;
set mapreduce.map.cpu.vcores=1;
SET mapreduce.map.memory.mb=4096;
SET mapreduce.map.java.opts=-Xmx3572M;
set mapreduce.reduce.cpu.vcores=1;
set mapreduce.reduce.memory.mb=4096;
set mapreduce.reduce.java.opts=-Xmx3572M;
set hive.optimize.skewjoin=true;
set hive.groupby.skewindata=true;
set hive.map.aggr=true;
set hive.exec.compress.intermediate=true;
set mapred.map.output.compression.codec= org.apache.hadoop.io.compress.SnappyCodec
set hive.exec.compress.output=true;
set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
set mapred.output.compression.type=BLOCK;
SET hive.default.fileformat = orc;
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nostrict;
SET hive.exec.max.dynamic.partitions=100000;
SET hive.exec.max.dynamic.partitions.pernode =10000;
set hive.vectorized.execution.enabled=TRUE;
set mapreduce.input.fileinputformat.input.dir.recursive=true
set mapreduce.map.cpu.vcores=$[目前值]*2
set mapreduce.map.memory.mb =$[目前值]*2
set mapreduce.map.java.opts=$[目前值] *2
set mapreduce.reduce.cpu.vcores=$[目前值]*2
set mapreduce.reduce.memory.mb=$[目前值]*2
set mapreduce.reduce.java.opts=$[目前值]*2
配置示例
demo1:
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000000;
set hive.mapjoin.smalltable.filesize=200000000;
set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.hadoop.supports.splittable.combineinputformat = true;
set mapreduce.input.fileinputformat.split.maxsize = 256000000;
set mapreduce.input.fileinputformat.split.minsize.per.node=256000000;
set mapreduce.input.fileinputformat.split.minsize.per.rack=256000000;
set hive.merge.mapfiles = true;
set hive.merge.mapredfiles = true;
set hive.merge.size.per.task = 256000000;
set hive.merge.smallfiles.avgsize=256000000;
set hive.merge.orcfile.stripe.level=false;
set hive.merge.rcfile.block.level=false;
SET hive.exec.parallel = true;
SET hive.exec.parallel.thread.number = 16;
SET mapred.max.split.size = 256000000;
SET mapred.min.split.size.per.node = 256000000;
SET mapred.min.split.size.per.rack = 256000000;
set hive.exec.reducers.bytes.per.reducer=5000000000;
SET mapreduce.map.memory.mb=4096;
SET mapreduce.map.java.opts=-Xmx3572M;
set mapreduce.reduce.memory.mb=4096;
set mapreduce.reduce.java.opts=-Xmx3572M;
set hive.optimize.skewjoin=true;
set hive.groupby.skewindata=true;
SET mapred.output.compress = true;
SET hive.exec.compress.output = true;
SET hive.default.fileformat = orc;
demo2:
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000000;
set hive.mapjoin.smalltable.filesize=200000000;
set hive.input.format = org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
set hive.hadoop.supports.splittable.combineinputformat = true;
set mapreduce.input.fileinputformat.split.maxsize = 2560000000;
set mapreduce.input.fileinputformat.split.minsize.per.node=2560000000;
set mapreduce.input.fileinputformat.split.minsize.per.rack=2560000000;
SET mapred.max.split.size = 2560000000;
SET mapred.min.split.size.per.node = 2560000000;
SET mapred.min.split.size.per.rack = 2560000000;
set hive.merge.mapfiles = true;
set hive.merge.mapredfiles = true;
set hive.merge.size.per.task = 2560000000;
set hive.merge.smallfiles.avgsize=2560000000;
set hive.merge.orcfile.stripe.level=false;
set hive.merge.rcfile.block.level=false;
SET hive.exec.parallel = true;
SET hive.exec.parallel.thread.number = 32;
set hive.exec.reducers.bytes.per.reducer=5000000000;
SET mapreduce.map.memory.mb=8192;
SET mapreduce.map.java.opts=-Xmx7186M;
set mapreduce.reduce.memory.mb=8192;
set mapreduce.reduce.java.opts=-Xmx7186M;
set hive.optimize.skewjoin=true;
SET mapred.output.compress = true;
SET hive.exec.compress.output = true;
SET hive.default.fileformat = orc;