记录一次hive优化的全过程
因为涉及公司隐私,本文所有的sql被精简化,加上脱敏处理。
第一版的sql:
set hive.exec.parallel=true;
set mapred.max.split.size=134217728;
set mapred.min.split.size.per.node=134217728;
set mapred.min.split.size.per.rack=134217728;
set hive.exec.reducers.bytes.per.reducer=1073741824;
set hive.exec.compress.output=true;
set hive.text.output.compress.split=true;
drop table if exists wangzy_broker_detail_daily_tmp01;
create table wangzy_broker_detail_daily_tmp01 as
select
a.broker_id,
b.broker_id,
c.broker_id,
d.broker_id
e.user_id,
f.broker_id,
g.broker_id,
h.broker_id,
i.broker_id
from dm_qqq_broker_detail_basis_daily a
left outer join dm_qqq_broker_detail_combo_daily b on a.broker_id=b.broker_id and b.cal_dt = ${dealDate} and b.broker_id>0
left outer join dm_qqq_broker_detail_account_daily c on a.broker_id=c.broker_id and c.cal_dt = ${dealDate} and c.broker_id>0
left outer join dm_aaa_broker_detail_normal_daily d on a.broker_id=d.broker_id and d.cal_dt = ${dealDate} and d.broker_id>0
left outer join dm_bbb_broker_detail_normal_daily e on a.user_id=e.user_id and e.cal_dt = ${dealDate} and coalesce(e.user_id,0)>0
left outer join dm_qqq_broker_detail_link_daily f on a.broker_id=f.broker_id and f.cal_dt = ${dealDate} and f.broker_id>0
left outer join (
select
user_id as broker_id
from(
select
user_id
from dw_new_prop_base_esf
where cal_dt=${dealDate} and state in(1,2) and substr(add_date,1,10)=${dealDate} and COALESCE(user_id,0)>0
union ALL
select
user_id
from dw_new_prop_base_zf
where cal_dt=${dealDate} and state in(1,2) and substr(add_date,1,10)=${dealDate} and COALESCE(user_id,0)>0
union ALL
select
user_id
from dw_new_prop_base_sydc
where cal_dt=${dealDate} and state in(1,2) and substr(add_date,1,10)=${dealDate} and COALESCE(user_id,0)>0
)aa group by user_id
) g on a.broker_id=g.broker_id
left outer join (
select
broker_id
from dm_aaa_user_coupon_daily
where cal_dt=${dealDate}
group by broker_id
) h on a.broker_id=h.broker_id and h.broker_id>0
left outer join (
select
broker_id
from dm_house_daily
where cal_dt=${dealDate}
group by broker_id
) i on a.broker_id=i.broker_id and i.broker_id>0
left outer join da_qqq_broker_quality_daily j on a.broker_id = j.broker_id and j.cal_dt = ${dealDate}
where a.cal_dt = ${dealDate};
use hdp_anjuke_dw_db;
alter table dm_qqq_broker_detail_v2_daily drop partition(cal_dt = ${dealDate});
insert overwrite table dm_broker_detail_daily partition(cal_dt = ${dealDate})
select
a.*
from hdp_anjuke_dw_stage.wangzy_broker_detail_daily_tmp01 a
join dw_qqq_account_city_config_daily cfg on a.city_id=cfg.city_id_ajk and cfg.cal_dt=${dealDate}
第一次优化:开启mapjoin
本次sql运行时长在2小时46分,肯定是一个待优化的sql,所以首先从日志入手。
2020-10-15 10:43:27 - insert overwrite table dm_qqq_broker_detail_v2_daily partition(cal_dt = '2020-10-14')
2020-10-15 10:43:27 - select
2020-10-15 10:43:27 - a.*
2020-10-15 10:43:27 - from wangzy_broker_detail_daily_tmp01 a
2020-10-15 10:43:27 - join dw_qwt_account_city_config_daily cfg on a.city_id=cfg.city_id_ajk and cfg.cal_dt='2020-10-14';
2020-10-15 10:43:27 - INFO HiveRunner - This is not a VIEW JOB...
2020-10-15 10:43:27 - INFO CommandLogger - ls: cannot access /usr/lib/software/spark/lib/spark-assembly-*.jar: No such file or directory
2020-10-15 10:43:29 - INFO CommandLogger - 20/10/15 10:43:29 WARN conf.HiveConf main: HiveConf of name hive.metastore.local does not exist
2020-10-15 10:43:29 - INFO CommandLogger - 20/10/15 10:43:29 WARN conf.HiveConf main: HiveConf of name hive.files.umask.value does not exist
2020-10-15 10:43:29 - INFO CommandLogger -
2020-10-15 10:43:29 - INFO CommandLogger - Logging initialized using configuration in file:/usr/lib/software/apache-hive-1.1.0U16.1-cdh5.4.4-bin/conf/hive-log4j.properties
2020-10-15 10:43:29 - INFO CommandLogger - SLF4J: Class path contains multiple SLF4J bindings.
2020-10-15 10:43:29 - INFO CommandLogger - SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
2020-10-15 10:43:29 - INFO CommandLogger - SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
2020-10-15 10:43:37 - INFO CommandLogger - OK
2020-10-15 10:43:37 - INFO CommandLogger - Time taken: 0.683 seconds
2020-10-15 10:43:39 - INFO CommandLogger - Query ID = hadoop_20201015104343_10a0c26d-63ab-4925-abc7-2830ca650e5f
2020-10-15 10:43:39 - INFO CommandLogger - Total jobs = 3
2020-10-15 10:43:39 - INFO CommandLogger - Launching Job 1 out of 3
2020-10-15 10:43:39 - INFO CommandLogger - Number of reduce tasks not specified. Estimated from input data size: 2
2020-10-15 10:43:39 - INFO CommandLogger - In order to change the average load for a reducer (in bytes):
2020-10-15 10:43:39 - INFO CommandLogger - set hive.exec.reducers.bytes.per.reducer=<number>
2020-10-15 10:43:39 - INFO CommandLogger - In order to limit the maximum number of reducers:
2020-10-15 10:43:39 - INFO CommandLogger - set hive.exec.reducers.max=<number>
2020-10-15 10:43:39 - INFO CommandLogger - In order to set a constant number of reducers:
2020-10-15 10:43:39 - INFO CommandLogger - set mapreduce.job.reduces=<number>
2020-10-15 10:43:45 - INFO CommandLogger - Starting Job = job_1591776234013_38784236, Tracking URL = http://wq-yarn1-rm2.58dns.org:9088/proxy/application_1591776234013_38784236/
2020-10-15 10:43:45 - INFO CommandLogger - Kill Command = /usr/lib/software/hadoop/bin/hadoop job -kill job_1591776234013_38784236
2020-10-15 10:45:02 - INFO CommandLogger - Hadoop job information for Stage-1: number of mappers: 7; number of reducers: 2
2020-10-15 10:45:02 - INFO CommandLogger - 2020-10-15 10:45:02,110 Stage-1 map = 0%, reduce = 0%
2020-10-15 10:45:13 - INFO CommandLogger - 2020-10-15 10:45:13,676 Stage-1 map = 14%, reduce = 0%, Cumulative CPU 3.34 sec
2020-10-15 10:45:35 - INFO CommandLogger - 2020-10-15 10:45:35,530 Stage-1 map = 15%, reduce = 0%, Cumulative CPU 175.21 sec
2020-10-15 10:46:35 - INFO CommandLogger - 2020-10-15 10:46:35,729 Stage-1 map = 15%, reduce = 0%, Cumulative CPU 590.7 sec
2020-10-15 10:47:13 - INFO CommandLogger - 2020-10-15 10:47:13,956 Stage-1 map = 19%, reduce = 0%, Cumulative CPU 848.16 sec
2020-10-15 10:47:33 - INFO CommandLogger - 2020-10-15 10:47:33,604 Stage-1 map = 24%, reduce = 0%, Cumulative CPU 978.12 sec
2020-10-15 10:47:45 - INFO CommandLogger - 2020-10-15 10:47:45,980 Stage-1 map = 26%, reduce = 0%, Cumulative CPU 1057.46 sec
2020-10-15 10:47:59 - INFO CommandLogger - 2020-10-15 10:47:59,482 Stage-1 map = 31%, reduce = 0%, Cumulative CPU 1152.96 sec
2020-10-15 10:48:13 - INFO CommandLogger - 2020-10-15 10:48:13,918 Stage-1 map = 33%, reduce = 0%, Cumulative CPU 1246.33 sec
2020-10-15 10:48:21 - INFO CommandLogger - 2020-10-15 10:48:21,137 Stage-1 map = 35%, reduce = 0%, Cumulative CPU 1298.7 sec
2020-10-15 10:49:21 - INFO CommandLogger - 2020-10-15 10:49:21,958 Stage-1 map = 35%, reduce = 0%, Cumulative CPU 1708.65 sec
2020-10-15 10:49:40 - INFO CommandLogger - 2020-10-15 10:49:40,496 Stage-1 map = 37%, reduce = 0%, Cumulative CPU 1835.72 sec
2020-10-15 10:49:55 - INFO CommandLogger - 2020-10-15 10:49:55,958 Stage-1 map = 39%, reduce = 0%, Cumulative CPU 1941.92 sec
2020-10-15 10:50:08 - INFO CommandLogger - 2020-10-15 10:50:08,316 Stage-1 map = 42%, reduce = 0%, Cumulative CPU 2023.94 sec
2020-10-15 10:50:43 - INFO CommandLogger - 2020-10-15 10:50:43,368 Stage-1 map = 50%, reduce = 0%, Cumulative CPU 2263.36 sec
2020-10-15 10:50:47 - INFO CommandLogger - 2020-10-15 10:50:47,504 Stage-1 map = 51%, reduce = 0%, Cumulative CPU 2236.66 sec
2020-10-15 10:50:49 - INFO CommandLogger - 2020-10-15 10:50:49,564 Stage-1 map = 54%, reduce = 0%, Cumulative CPU 2304.41 sec
2020-10-15 10:50:53 - INFO CommandLogger - 2020-10-15 10:50:53,695 Stage-1 map = 58%, reduce = 0%, Cumulative CPU 2339.7 sec
2020-10-15 10:51:54 - INFO CommandLogger - 2020-10-15 10:51:54,605 Stage-1 map = 62%, reduce = 0%, Cumulative CPU 2671.11 sec
2020-10-15 10:51:57 - INFO CommandLogger - 2020-10-15 10:51:57,702 Stage-1 map = 65%, reduce = 0%, Cumulative CPU 2687.17 sec
2020-10-15 10:52:06 - INFO CommandLogger - 2020-10-15 10:52:06,960 Stage-1 map = 67%, reduce = 0%, Cumulative CPU 2735.25 sec
2020-10-15 10:52:16 - INFO CommandLogger - 2020-10-15 10:52:16,223 Stage-1 map = 72%, reduce = 0%, Cumulative CPU 2733.19 sec
2020-10-15 10:52:18 - INFO CommandLogger - 2020-10-15 10:52:18,298 Stage-1 map = 73%, reduce = 0%, Cumulative CPU 2736.4 sec
2020-10-15 10:52:21 - INFO CommandLogger - 2020-10-15 10:52:21,384 Stage-1 map = 75%, reduce = 0%, Cumulative CPU 2750.12 sec
2020-10-15 10:52:31 - INFO CommandLogger - 2020-10-15 10:52:31,662 Stage-1 map = 80%, reduce = 0%, Cumulative CPU 2796.53 sec
2020-10-15 10:52:37 - INFO CommandLogger - 2020-10-15 10:52:37,830 Stage-1 map = 82%, reduce = 0%, Cumulative CPU 2817.78 sec
2020-10-15 10:52:42 - INFO CommandLogger - 2020-10-15 10:52:42,980 Stage-1 map = 84%, reduce = 0%, Cumulative CPU 2831.15 sec
2020-10-15 10:53:43 - INFO CommandLogger - 2020-10-15 10:53:43,714 Stage-1 map = 84%, reduce = 0%, Cumulative CPU 2958.42 sec
2020-10-15 10:54:04 - INFO CommandLogger - 2020-10-15 10:54:04,284 Stage-1 map = 88%, reduce = 0%, Cumulative CPU 2999.64 sec
2020-10-15 10:54:07 - INFO CommandLogger - 2020-10-15 10:54:07,367 Stage-1 map = 90%, reduce = 0%, Cumulative CPU 3005.6 sec
2020-10-15 10:54:16 - INFO CommandLogger - 2020-10-15 10:54:16,631 Stage-1 map = 93%, reduce = 0%, Cumulative CPU 3023.81 sec
2020-10-15 10:55:17 - INFO CommandLogger - 2020-10-15 10:55:17,450 Stage-1 map = 93%, reduce = 0%, Cumulative CPU 3101.93 sec
2020-10-15 10:55:21 - INFO CommandLogger - 2020-10-15 10:55:21,570 Stage-1 map = 98%, reduce = 0%, Cumulative CPU 3109.75 sec
2020-10-15 10:55:30 - INFO CommandLogger - 2020-10-15 10:55:30,823 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 3119.14 sec
2020-10-15 10:55:50 - INFO CommandLogger - 2020-10-15 10:55:50,564 Stage-1 map = 100%, reduce = 67%, Cumulative CPU 3148.68 sec
2020-10-15 10:56:37 - INFO CommandLogger - 2020-10-15 10:56:37,250 Stage-1 map = 100%, reduce = 68%, Cumulative CPU 3280.13 sec
2020-10-15 10:57:37 - INFO CommandLogger - 2020-10-15 10:57:37,992 Stage-1 map = 100%, reduce = 68%, Cumulative CPU 3474.28 sec
2020-10-15 10:57:39 - INFO CommandLogger - 2020-10-15 10:57:39,020 Stage-1 map = 100%, reduce = 69%, Cumulative CPU 3487.24 sec
2020-10-15 10:58:39 - INFO CommandLogger - 2020-10-15 10:58:39,876 Stage-1 map = 100%, reduce = 69%, Cumulative CPU 3696.85 sec
2020-10-15 10:59:07 - INFO CommandLogger - 2020-10-15 10:59:07,920 Stage-1 map = 100%, reduce = 70%, Cumulative CPU 3790.21 sec
2020-10-15 11:00:07 - INFO CommandLogger - 2020-10-15 11:00:07,945 Stage-1 map = 100%, reduce = 70%, Cumulative CPU 4016.91 sec
2020-10-15 11:00:33 - INFO CommandLogger - 2020-10-15 11:00:33,773 Stage-1 map = 100%, reduce = 71%, Cumulative CPU 4109.18 sec
2020-10-15 11:01:33 - INFO CommandLogger - 2020-10-15 11:01:33,541 Stage-1 map = 100%, reduce = 72%, Cumulative CPU 4316.44 sec
2020-10-15 11:02:34 - INFO CommandLogger - 2020-10-15 11:02:34,232 Stage-1 map = 100%, reduce = 72%, Cumulative CPU 4510.48 sec
2020-10-15 11:02:58 - INFO CommandLogger - 2020-10-15 11:02:58,887 Stage-1 map = 100%, reduce = 73%, Cumulative CPU 4600.11 sec
2020-10-15 11:03:59 - INFO CommandLogger - 2020-10-15 11:03:59,564 Stage-1 map = 100%, reduce = 73%, Cumulative CPU 4806.97 sec
2020-10-15 11:05:00 - INFO CommandLogger - 2020-10-15 11:05:00,238 Stage-1 map = 100%, reduce = 73%, Cumulative CPU 5013.44 sec
2020-10-15 11:05:02 - INFO CommandLogger - 2020-10-15 11:05:02,302 Stage-1 map = 100%, reduce = 74%, Cumulative CPU 5022.53 sec
2020-10-15 11:06:02 - INFO CommandLogger - 2020-10-15 11:06:02,474 Stage-1 map = 100%, reduce = 74%, Cumulative CPU 5240.58 sec
2020-10-15 11:07:02 - INFO CommandLogger - 2020-10-15 11:07:02,569 Stage-1 map = 100%, reduce = 74%, Cumulative CPU 5470.51 sec
2020-10-15 11:08:03 - INFO CommandLogger - 2020-10-15 11:08:03,427 Stage-1 map = 100%, reduce = 74%, Cumulative CPU 5723.89 sec
2020-10-15 11:08:36 - INFO CommandLogger - 2020-10-15 11:08:36,313 Stage-1 map = 100%, reduce = 75%, Cumulative CPU 5856.92 sec
2020-10-15 11:09:36 - INFO CommandLogger - 2020-10-15 11:09:36,891 Stage-1 map = 100%, reduce = 75%, Cumulative CPU 6086.42 sec
2020-10-15 11:10:15 - INFO CommandLogger - 2020-10-15 11:10:15,957 Stage-1 map = 100%, reduce = 76%, Cumulative CPU 6232.2 sec
2020-10-15 11:11:16 - INFO CommandLogger - 2020-10-15 11:11:16,611 Stage-1 map = 100%, reduce = 76%, Cumulative CPU 6478.2 sec
2020-10-15 11:12:17 - INFO CommandLogger - 2020-10-15 11:12:17,303 Stage-1 map = 100%, reduce = 76%, Cumulative CPU 6700.76 sec
2020-10-15 11:12:58 - INFO CommandLogger - 2020-10-15 11:12:58,476 Stage-1 map = 100%, reduce = 77%, Cumulative CPU 6859.71 sec
2020-10-15 11:13:59 - INFO CommandLogger - 2020-10-15 11:13:59,170 Stage-1 map = 100%, reduce = 77%, Cumulative CPU 7070.44 sec
2020-10-15 11:14:01 - INFO CommandLogger - 2020-10-15 11:14:01,220 Stage-1 map = 100%, reduce = 78%, Cumulative CPU 7078.4 sec
2020-10-15 11:15:01 - INFO CommandLogger - 2020-10-15 11:15:01,725 Stage-1 map = 100%, reduce = 78%, Cumulative CPU 7288.16 sec
2020-10-15 11:15:41 - INFO CommandLogger - 2020-10-15 11:15:41,770 Stage-1 map = 100%, reduce = 79%, Cumulative CPU 7440.69 sec
2020-10-15 11:16:42 - INFO CommandLogger - 2020-10-15 11:16:42,298 Stage-1 map = 100%, reduce = 79%, Cumulative CPU 7664.06 sec
2020-10-15 11:17:43 - INFO CommandLogger - 2020-10-15 11:17:43,260 Stage-1 map = 100%, reduce = 79%, Cumulative CPU 7868.57 sec
2020-10-15 11:17:52 - INFO CommandLogger - 2020-10-15 11:17:52,542 Stage-1 map = 100%, reduce = 80%, Cumulative CPU 7892.0 sec
2020-10-15 11:18:53 - INFO CommandLogger - 2020-10-15 11:18:53,505 Stage-1 map = 100%, reduce = 80%, Cumulative CPU 8099.71 sec
2020-10-15 11:19:54 - INFO CommandLogger - 2020-10-15 11:19:54,142 Stage-1 map = 100%, reduce = 80%, Cumulative CPU 8307.22 sec
2020-10-15 11:20:06 - INFO CommandLogger - 2020-10-15 11:20:06,605 Stage-1 map = 100%, reduce = 81%, Cumulative CPU 8358.3 sec
2020-10-15 11:21:07 - INFO CommandLogger - 2020-10-15 11:21:07,159 Stage-1 map = 100%, reduce = 81%, Cumulative CPU 8575.22 sec
2020-10-15 11:21:25 - INFO CommandLogger - 2020-10-15 11:21:25,652 Stage-1 map = 100%, reduce = 82%, Cumulative CPU 8637.31 sec
2020-10-15 11:22:26 - INFO CommandLogger - 2020-10-15 11:22:26,363 Stage-1 map = 100%, reduce = 82%, Cumulative CPU 8848.1 sec
2020-10-15 11:23:26 - INFO CommandLogger - 2020-10-15 11:23:26,997 Stage-1 map = 100%, reduce = 82%, Cumulative CPU 9038.11 sec
2020-10-15 11:23:54 - INFO CommandLogger - 2020-10-15 11:23:54,660 Stage-1 map = 100%, reduce = 83%, Cumulative CPU 9127.46 sec
2020-10-15 11:24:55 - INFO CommandLogger - 2020-10-15 11:24:55,096 Stage-1 map = 100%, reduce = 83%, Cumulative CPU 9321.88 sec
2020-10-15 11:25:20 - INFO CommandLogger - 2020-10-15 11:25:20,755 Stage-1 map = 100%, reduce = 84%, Cumulative CPU 9389.88 sec
2020-10-15 11:26:21 - INFO CommandLogger - 2020-10-15 11:26:21,205 Stage-1 map = 100%, reduce = 84%, Cumulative CPU 9585.18 sec
2020-10-15 11:27:21 - INFO CommandLogger - 2020-10-15 11:27:21,669 Stage-1 map = 100%, reduce = 84%, Cumulative CPU 9793.14 sec
2020-10-15 11:27:40 - INFO CommandLogger - 2020-10-15 11:27:40,412 Stage-1 map = 100%, reduce = 85%, Cumulative CPU 9867.98 sec
2020-10-15 11:28:40 - INFO CommandLogger - 2020-10-15 11:28:40,944 Stage-1 map = 100%, reduce = 85%, Cumulative CPU 10081.2 sec
2020-10-15 11:29:20 - INFO CommandLogger - 2020-10-15 11:29:20,895 Stage-1 map = 100%, reduce = 86%, Cumulative CPU 10215.68 sec
2020-10-15 11:30:21 - INFO CommandLogger - 2020-10-15 11:30:21,648 Stage-1 map = 100%, reduce = 86%, Cumulative CPU 10429.4 sec
2020-10-15 11:31:15 - INFO CommandLogger - 2020-10-15 11:31:15,194 Stage-1 map = 100%, reduce = 87%, Cumulative CPU 10623.16 sec
2020-10-15 11:32:15 - INFO CommandLogger - 2020-10-15 11:32:15,694 Stage-1 map = 100%, reduce = 87%, Cumulative CPU 10842.0 sec
2020-10-15 11:32:59 - INFO CommandLogger - 2020-10-15 11:32:59,905 Stage-1 map = 100%, reduce = 88%, Cumulative CPU 11015.27 sec
2020-10-15 11:34:00 - INFO CommandLogger - 2020-10-15 11:34:00,676 Stage-1 map = 100%, reduce = 88%, Cumulative CPU 11235.16 sec
2020-10-15 11:34:36 - INFO CommandLogger - 2020-10-15 11:34:36,661 Stage-1 map = 100%, reduce = 89%, Cumulative CPU 11347.48 sec
2020-10-15 11:35:37 - INFO CommandLogger - 2020-10-15 11:35:37,327 Stage-1 map = 100%, reduce = 89%, Cumulative CPU 11546.86 sec
2020-10-15 11:36:04 - INFO CommandLogger - 2020-10-15 11:36:04,108 Stage-1 map = 100%, reduce = 90%, Cumulative CPU 11639.91 sec
2020-10-15 11:37:04 - INFO CommandLogger - 2020-10-15 11:37:04,747 Stage-1 map = 100%, reduce = 90%, Cumulative CPU 11852.84 sec
2020-10-15 11:37:51 - INFO CommandLogger - 2020-10-15 11:37:51,860 Stage-1 map = 100%, reduce = 91%, Cumulative CPU 11990.68 sec
2020-10-15 11:38:52 - INFO CommandLogger - 2020-10-15 11:38:52,275 Stage-1 map = 100%, reduce = 91%, Cumulative CPU 12189.24 sec
2020-10-15 11:39:22 - INFO CommandLogger - 2020-10-15 11:39:22,995 Stage-1 map = 100%, reduce = 92%, Cumulative CPU 12302.73 sec
2020-10-15 11:40:23 - INFO CommandLogger - 2020-10-15 11:40:23,508 Stage-1 map = 100%, reduce = 92%, Cumulative CPU 12497.38 sec
2020-10-15 11:40:36 - INFO CommandLogger - 2020-10-15 11:40:36,931 Stage-1 map = 100%, reduce = 93%, Cumulative CPU 12554.85 sec
2020-10-15 11:41:37 - INFO CommandLogger - 2020-10-15 11:41:37,492 Stage-1 map = 100%, reduce = 93%, Cumulative CPU 12750.91 sec
2020-10-15 11:41:51 - INFO CommandLogger - 2020-10-15 11:41:51,831 Stage-1 map = 100%, reduce = 94%, Cumulative CPU 12796.6 sec
2020-10-15 11:42:40 - INFO CommandLogger - 2020-10-15 11:42:40,927 Stage-1 map = 100%, reduce = 95%, Cumulative CPU 12969.88 sec
2020-10-15 11:43:39 - INFO CommandLogger - 2020-10-15 11:43:39,777 Stage-1 map = 100%, reduce = 96%, Cumulative CPU 13162.3 sec
2020-10-15 11:44:17 - INFO CommandLogger - 2020-10-15 11:44:17,876 Stage-1 map = 100%, reduce = 97%, Cumulative CPU 13292.29 sec
2020-10-15 11:44:55 - INFO CommandLogger - 2020-10-15 11:44:55,866 Stage-1 map = 100%, reduce = 98%, Cumulative CPU 13421.68 sec
2020-10-15 11:45:56 - INFO CommandLogger - 2020-10-15 11:45:56,621 Stage-1 map = 100%, reduce = 98%, Cumulative CPU 13534.96 sec
2020-10-15 11:46:13 - INFO CommandLogger - 2020-10-15 11:46:13,087 Stage-1 map = 100%, reduce = 99%, Cumulative CPU 13563.99 sec
2020-10-15 11:47:13 - INFO CommandLogger - 2020-10-15 11:47:13,719 Stage-1 map = 100%, reduce = 99%, Cumulative CPU 13664.41 sec
2020-10-15 11:47:59 - INFO CommandLogger - 2020-10-15 11:47:59,880 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 13737.04 sec
2020-10-15 11:48:01 - INFO CommandLogger - MapReduce Total cumulative CPU time: 0 days 3 hours 48 minutes 57 seconds 40 msec
2020-10-15 11:48:01 - INFO CommandLogger - Ended Job = job_1591776234013_38784236
2020-10-15 11:48:03 - INFO CommandLogger - Stage-4 is filtered out by condition resolver.
2020-10-15 11:48:03 - INFO CommandLogger - Stage-3 is selected by condition resolver.
2020-10-15 11:48:03 - INFO CommandLogger - Stage-5 is filtered out by condition resolver.
2020-10-15 11:48:05 - INFO CommandLogger - Launching Job 3 out of 3
2020-10-15 11:48:05 - INFO CommandLogger - Number of reduce tasks is set to 0 since there's no reduce operator
2020-10-15 11:48:08 - INFO CommandLogger - Starting Job = job_1591776234013_38803470, Tracking URL = http://wq-yarn1-rm2.58dns.org:9088/proxy/application_1591776234013_38803470/
2020-10-15 11:48:08 - INFO CommandLogger - Kill Command = /usr/lib/software/hadoop/bin/hadoop job -kill job_1591776234013_38803470
2020-10-15 11:48:31 - INFO CommandLogger - Hadoop job information for Stage-3: number of mappers: 5; number of reducers: 0
2020-10-15 11:48:31 - INFO CommandLogger - 2020-10-15 11:48:31,985 Stage-3 map = 0%, reduce = 0%
2020-10-15 11:49:32 - INFO CommandLogger - 2020-10-15 11:49:32,219 Stage-3 map = 0%, reduce = 0%, Cumulative CPU 258.89 sec
2020-10-15 11:49:45 - INFO CommandLogger - 2020-10-15 11:49:45,678 Stage-3 map = 2%, reduce = 0%, Cumulative CPU 320.74 sec
2020-10-15 11:50:46 - INFO CommandLogger - 2020-10-15 11:50:46,676 Stage-3 map = 2%, reduce = 0%, Cumulative CPU 627.4 sec
2020-10-15 11:51:26 - INFO CommandLogger - 2020-10-15 11:51:26,018 Stage-3 map = 9%, reduce = 0%, Cumulative CPU 811.35 sec
2020-10-15 11:51:30 - INFO CommandLogger - 2020-10-15 11:51:30,134 Stage-3 map = 29%, reduce = 0%, Cumulative CPU 831.16 sec
2020-10-15 11:51:31 - INFO CommandLogger - 2020-10-15 11:51:31,159 Stage-3 map = 36%, reduce = 0%, Cumulative CPU 834.16 sec
2020-10-15 11:51:33 - INFO CommandLogger - 2020-10-15 11:51:33,240 Stage-3 map = 43%, reduce = 0%, Cumulative CPU 842.08 sec
2020-10-15 11:52:33 - INFO CommandLogger - 2020-10-15 11:52:33,339 Stage-3 map = 43%, reduce = 0%, Cumulative CPU 1055.24 sec
2020-10-15 11:52:38 - INFO CommandLogger - 2020-10-15 11:52:38,547 Stage-3 map = 48%, reduce = 0%, Cumulative CPU 1072.15 sec
2020-10-15 11:53:38 - INFO CommandLogger - 2020-10-15 11:53:38,621 Stage-3 map = 48%, reduce = 0%, Cumulative CPU 1301.1 sec
2020-10-15 11:54:02 - INFO CommandLogger - 2020-10-15 11:54:02,531 Stage-3 map = 54%, reduce = 0%, Cumulative CPU 1385.39 sec
2020-10-15 11:54:19 - INFO CommandLogger - 2020-10-15 11:54:19,058 Stage-3 map = 61%, reduce = 0%, Cumulative CPU 1423.86 sec
2020-10-15 11:54:40 - INFO CommandLogger - 2020-10-15 11:54:40,723 Stage-3 map = 67%, reduce = 0%, Cumulative CPU 1527.8 sec
2020-10-15 11:55:34 - INFO CommandLogger - 2020-10-15 11:55:34,333 Stage-3 map = 73%, reduce = 0%, Cumulative CPU 1726.6 sec
2020-10-15 11:56:34 - INFO CommandLogger - 2020-10-15 11:56:34,717 Stage-3 map = 73%, reduce = 0%, Cumulative CPU 1944.37 sec
2020-10-15 11:56:35 - INFO CommandLogger - 2020-10-15 11:56:35,744 Stage-3 map = 80%, reduce = 0%, Cumulative CPU 1946.54 sec
2020-10-15 11:57:14 - INFO CommandLogger - 2020-10-15 11:57:14,163 Stage-3 map = 87%, reduce = 0%, Cumulative CPU 2051.26 sec
2020-10-15 11:57:31 - INFO CommandLogger - 2020-10-15 11:57:31,644 Stage-3 map = 94%, reduce = 0%, Cumulative CPU 2085.4 sec
2020-10-15 11:58:32 - INFO CommandLogger - 2020-10-15 11:58:32,303 Stage-3 map = 94%, reduce = 0%, Cumulative CPU 2140.42 sec
2020-10-15 11:58:47 - INFO CommandLogger - 2020-10-15 11:58:47,830 Stage-3 map = 100%, reduce = 0%, Cumulative CPU 2153.37 sec
2020-10-15 11:58:49 - INFO CommandLogger - MapReduce Total cumulative CPU time: 35 minutes 53 seconds 370 msec
2020-10-15 11:58:49 - INFO CommandLogger - Ended Job = job_1591776234013_38803470
2020-10-15 11:58:51 - INFO CommandLogger - Loading data to table dm_qqq_broker_detail_v2_daily partition (cal_dt=2020-10-14)
2020-10-15 11:58:53 - INFO CommandLogger - Partition dm_qqq_broker_detail_v2_daily{cal_dt=2020-10-14} stats: [numFiles=14, numRows=7468075, totalSize=1407456718, rawDataSize=14422227854]
2020-10-15 11:58:54 - INFO CommandLogger - MapReduce Jobs Launched:
2020-10-15 11:58:54 - INFO CommandLogger - Stage-Stage-1: Map: 7 Reduce: 2 Cumulative CPU: 13737.04 sec HDFS Read: 2050308004 HDFS Write: 1407466153 SUCCESS
2020-10-15 11:58:54 - INFO CommandLogger - Stage-Stage-3: Map: 5 Cumulative CPU: 2153.37 sec HDFS Read: 1407795023 HDFS Write: 1407456718 SUCCESS
2020-10-15 11:58:54 - INFO CommandLogger - Total MapReduce CPU Time Spent: 0 days 4 hours 24 minutes 50 seconds 410 msec
2020-10-15 11:58:54 - INFO CommandLogger - OK
2020-10-15 11:58:54 - INFO CommandLogger - Time taken: 4516.979 seconds
2020-10-15 11:58:54 - INFO HiveRunner - run hql(s) over
2020-10-15 11:58:54 - INFO SpringContextUtil - executor global spring context will be closed.
2020-10-15 11:58:54 - job execute finished, exitCode: 0, exitState: 运行成功
2020-10-15 11:58:54 - signal generated: jobState(400), return: (true)
2020-10-15 11:58:54 - End
这是最后一步的日志。从上面的sql可以看到,最后一步的sql只有短短的一个join。join的也是一个小表。但是sql足足运行了一个多小时。这绝对不能忍!!!
所以这里就需要吧这个小表写进内存里面,在不改变sql语句的情况下。只需要加上一个参数
set hive.auto.convert.join=true;
这个参数是开启mapjoin。hive会自动将小表加入内存里面读取。
优化完后运行时间直接从2小时46分降到了2小时。
第二次优化(寻找数据倾斜)
INFO CommandLogger - 2020-10-21 06:42:59,147 Stage-3 map = 100%, reduce = 97%, Cumulative CPU 2712.84 sec
INFO CommandLogger - 2020-10-21 06:43:41,517 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 2778.26 sec
INFO CommandLogger - 2020-10-21 06:44:42,347 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 2878.27 sec
INFO CommandLogger - 2020-10-21 06:45:42,353 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 2969.98 sec
INFO CommandLogger - 2020-10-21 06:46:43,126 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3070.45 sec
INFO CommandLogger - 2020-10-21 06:47:43,817 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3153.5 sec
INFO CommandLogger - 2020-10-21 06:48:44,622 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3227.89 sec
INFO CommandLogger - 2020-10-21 06:49:44,652 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3295.1 sec
INFO CommandLogger - 2020-10-21 06:50:44,820 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3358.19 sec
INFO CommandLogger - 2020-10-21 06:51:44,916 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3419.95 sec
INFO CommandLogger - 2020-10-21 06:52:45,752 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3482.48 sec
INFO CommandLogger - 2020-10-21 06:53:46,528 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3548.78 sec
INFO CommandLogger - 2020-10-21 06:54:47,274 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3616.27 sec
INFO CommandLogger - 2020-10-21 06:55:48,197 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3686.04 sec
INFO CommandLogger - 2020-10-21 06:56:49,222 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3751.53 sec
INFO CommandLogger - 2020-10-21 06:57:49,974 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3832.56 sec
INFO CommandLogger - 2020-10-21 06:58:50,215 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3900.31 sec
INFO CommandLogger - 2020-10-21 06:59:51,068 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 3984.47 sec
INFO CommandLogger - 2020-10-21 07:00:51,944 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4069.52 sec
INFO CommandLogger - 2020-10-21 07:01:52,924 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4154.65 sec
INFO CommandLogger - 2020-10-21 07:02:53,948 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4228.69 sec
INFO CommandLogger - 2020-10-21 07:03:54,043 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4298.79 sec
INFO CommandLogger - 2020-10-21 07:04:54,779 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4377.97 sec
INFO CommandLogger - 2020-10-21 07:05:55,540 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4456.26 sec
INFO CommandLogger - 2020-10-21 07:06:56,310 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4533.57 sec
INFO CommandLogger - 2020-10-21 07:07:56,950 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4612.82 sec
INFO CommandLogger - 2020-10-21 07:08:57,617 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4695.32 sec
INFO CommandLogger - 2020-10-21 07:09:58,284 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4777.87 sec
INFO CommandLogger - 2020-10-21 07:10:58,978 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4861.28 sec
INFO CommandLogger - 2020-10-21 07:11:59,590 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 4938.43 sec
INFO CommandLogger - 2020-10-21 07:13:00,297 Stage-3 map = 100%, reduce = 98%, Cumulative CPU 5008.75 sec
INFO CommandLogger - 2020-10-21 07:13:12,645 Stage-3 map = 100%, reduce = 99%, Cumulative CPU 5020.57 sec
继续去看日志,很容易就能看到这么一段,这一看就是数据倾斜了。
所以继续去yarn的历史日志去确认是否数据倾斜。
从上图就可以很明显看到。有一个reduce倾斜了。
所以现在就要找到是哪个语句的reduce倾斜了。
在这个日志的最上方开始处有这么一条信息
CommandLogger - Hadoop job information for Stage-3: number of mappers: 30; number of reducers: 4
很容易可以看到。是stage3的时候,数据倾斜。
所以,只要找到stage是哪个语句就能找到数据倾斜的地方。
通过explain。将sql输出语法树。
这个语法树会非常的长。但是只要找到stage-3是哪个语句就好了。
语法树的日志就不详细贴出。
通过文本搜索’Stage-3’。
在语法树的下面能看到 alias:e 的标签。
所以,上面的sql语句是e表的join出现了数据倾斜。
回顾join e表的语句。
left outer join dm_bbb_broker_detail_normal_daily e on a.user_id=e.user_id and e.cal_dt = ${dealDate} and coalesce(e.user_id,0)>0
这个语句的数据倾斜一共就两个地方:1.主表的user_id倾斜了。2.从表的user_id倾斜了。
通过查找。可以发现是主表的user_id倾斜了。里面user_id为0的条数有几十万条。
单找到了数据倾斜的地方。优化就变得简单了。
现在直接将主表的user_id打散就好了。
left outer join dm_bbb_broker_detail_normal_daily e on (case when a.user_id = 0 then cast(ceiling(rand() * -65535) as bigint) else a.user_id end)=e.user_id and e.cal_dt = ${dealDate} and coalesce(e.user_id,0)>0
这里用了一个随机数,将user_id为0的字段全部分散。
这里的优化效果很明显。
原来2小时的运行时间。现在一小时就能结束了。
第三次优化(调整map数量与reduce数量)
set mapred.max.split.size=134217728;
set mapred.min.split.size.per.node=134217728;
set mapred.min.split.size.per.rack=134217728;
set hive.exec.reducers.bytes.per.reducer=1073741824;
从上面的参数可以看到。这里sql设置的map切分是128兆。随便挑一个任务去yarn看一下详细情况。
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-XSB8rdx5-1603819129984)(https://i.loli.net/2020/10/26/RL28tnwjkT47yNH.png)]
能看到起了100多个map。
但是每一个map的时间都很短
这里就会造成了yarn会启动大量的map,造成了资源浪费。所以减少map数量迫在眉睫。
经过调试
set mapred.max.split.size=1024000000;
set mapred.min.split.size.per.node=1024000000;
set mapred.min.split.size.per.rack=1024000000;
set mapred.reduce.tasks=150;
将map切分数量过大了近10倍,能明显减少map数量
这里需要经过多次的调试,确定一个最优的值。
最后将直接缩短到了50分钟
最后的sql
set hive.exec.parallel=true;
set mapred.max.split.size=1024000000;
set mapred.min.split.size.per.node=1024000000;
set mapred.min.split.size.per.rack=1024000000;
set mapred.reduce.tasks=150;
set hive.exec.compress.output=true;
set hive.text.output.compress.split=true;
drop table if exists wangzy_broker_detail_daily_tmp01;
create table wangzy_broker_detail_daily_tmp01 as
select
a.broker_id,
b.broker_id,
c.broker_id,
d.broker_id
e.user_id,
f.broker_id,
g.broker_id,
h.broker_id,
i.broker_id
from dm_qqq_broker_detail_basis_daily a
left outer join dm_qqq_broker_detail_combo_daily b on a.broker_id=b.broker_id and b.cal_dt = ${dealDate} and b.broker_id>0
left outer join dm_qqq_broker_detail_account_daily c on a.broker_id=c.broker_id and c.cal_dt = ${dealDate} and c.broker_id>0
left outer join dm_aaa_broker_detail_normal_daily d on a.broker_id=d.broker_id and d.cal_dt = ${dealDate} and d.broker_id>0
left outer join dm_bbb_broker_detail_normal_daily e on (case when a.user_id = 0 then cast(ceiling(rand() * -65535) as bigint) else a.user_id end)=e.user_id and e.cal_dt = ${dealDate} and coalesce(e.user_id,0)>0
left outer join dm_qqq_broker_detail_link_daily f on a.broker_id=f.broker_id and f.cal_dt = ${dealDate} and f.broker_id>0
left outer join (
select
user_id as broker_id
from(
select
user_id
from dw_new_prop_base_esf
where cal_dt=${dealDate} and state in(1,2) and substr(add_date,1,10)=${dealDate} and COALESCE(user_id,0)>0
union ALL
select
user_id
from dw_new_prop_base_zf
where cal_dt=${dealDate} and state in(1,2) and substr(add_date,1,10)=${dealDate} and COALESCE(user_id,0)>0
union ALL
select
user_id
from dw_new_prop_base_sydc
where cal_dt=${dealDate} and state in(1,2) and substr(add_date,1,10)=${dealDate} and COALESCE(user_id,0)>0
)aa group by user_id
) g on a.broker_id=g.broker_id
left outer join (
select
broker_id
from dm_aaa_user_coupon_daily
where cal_dt=${dealDate}
group by broker_id
) h on a.broker_id=h.broker_id and h.broker_id>0
left outer join (
select
broker_id
from dm_house_daily
where cal_dt=${dealDate}
group by broker_id
) i on a.broker_id=i.broker_id and i.broker_id>0
left outer join da_qqq_broker_quality_daily j on a.broker_id = j.broker_id and j.cal_dt = ${dealDate}
where a.cal_dt = ${dealDate};
use hdp_anjuke_dw_db;
alter table dm_qqq_broker_detail_v2_daily drop partition(cal_dt = ${dealDate});
set hive.auto.convert.join=true;
insert overwrite table dm_broker_detail_daily partition(cal_dt = ${dealDate})
select
a.*
from hdp_anjuke_dw_stage.wangzy_broker_detail_daily_tmp01 a
join dw_qqq_account_city_config_daily cfg on a.city_id=cfg.city_id_ajk and cfg.cal_dt=${dealDate}