1.取消Hive压缩
SET hive.exec.compress.output=false;
SET mapred.output.compress=false;
SET mapreduce.output.fileoutputformat.compress=false;
2.Hive动态分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.dynamic.partitions.pernode=500;
SET hive.exec.dynamic.partitions.partitions=5000;
3.导出RCFILE到CSV
SET hive.exec.compress.output=false;
SET mapred.output.compress=false;
SET mapreduce.output.fileoutputformat.compress=false;
INSERT OVERWRITE LOCAL DIRECTORY ‘/home/mzsip/lx/adl_fdt_app_similar’ ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘,’
SELECT packageid,similarid,score,categoryid FROM adl_fdt_app_similar;
4.不启用mapjoin
set hive.auto.convert.join=false; set hive.auto.convert.join=true;
5.设置每个map/reduce占用多个cpu虚拟核数
set mapreduce.map.cpu.vcores=20 set mapreduce.reduce.cpu.vcores=20
6.每个reducer处理文件大小
set hive.exec.reducers.bytes.per.reducer=268435456;(256*1024*1024)
set hive.exec.reducers.max=50;
set mapreduce.job.reduces=10;