hive—两例倾斜case优化

6-1:count(Distinct)所产生的倾斜

优化前:588.23 seconds

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

select supplier,

count(distinct a.crypto_customer_number)

from

(

    select crypto_customer_number,service_id

    from sdm.sdm_sdb_crm_pre_call_record_full_d

    where dt=date_sub(current_date(),1)

) a

left join

(--取processid

    select service_id,process_id

    from  sdm.sdm_sdb_crm_predictive_call_process_running_record_full_d

    where dt=date_sub(current_date(),1)

) b

on a.service_id=b.service_id

left join

(

    select id,robot_code

    from  sdm.sdm_sdb_crm_predictive_call_process_config_full_d

    where dt=date_sub(current_date(),1)

    and robot_code <> ''

) c

on b.process_id=c.id

left join

(

    select robot_code,supplier

    from sdm.sdm_sdb_crm_ivr_call_robot_config_full_d

    where dt=date_sub(current_date(),1)

) d

on d.robot_code=c.robot_code

group by supplier

优化后:48.23 seconds

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

select supplier,

count(distinct a.crypto_customer_number)

from

(

    select crypto_customer_number,service_id

    from sdm.sdm_sdb_crm_pre_call_record_full_d

    where dt=date_sub(current_date(),1)

) a

left join

(--取processid

    select service_id,process_id

    from sdm.sdm_sdb_crm_predictive_call_process_running_record_full_d

    where dt=date_sub(current_date(),1)

) b

on a.service_id=b.service_id

left join

(

    select id,robot_code

    from sdm.sdm_sdb_crm_predictive_call_process_config_full_d

    where dt=date_sub(current_date(),1)

    and robot_code <> ''

) c

on b.process_id=c.id

left join

(

    select robot_code,supplier

    from sdm.sdm_sdb_crm_ivr_call_robot_config_full_d

    where dt=date_sub(current_date(),1)

) d

on d.robot_code=c.robot_code

where nvl(supplier,'')<>''

group by supplier

优化前:10min

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

select

  count(distinct a.user_id) `公众号+app用户数`,

  count(distinct b.user_id) `公众号+app&企信用户数`

  from

(

    select cast(user_id as bigint) user_id

    from sdm.sdm_user_follow_status_full_d

    where dt='2021-11-08'

    and third_type in (7,420,443,445)

    and follow_flag=1

    and user_id>0

   

    union all

     

    select  cast(user_id as bigint) user_id

    from sdm.sdm_biz_app_visit_history_full_d

    where dt=date_sub(current_date(),1)

    and user_id>0

    group by cast(user_id as bigint)

   

)a

left join

(

  select outer_sd_user_id as user_id

  from sdm_sdb_crm_wechat_qy_user_outer_relation_full_d

  where dt='2021-11-08'

  and outer_sd_user_id>0

  and is_delete = 0

  and valid = 1

)b on a.user_id=b.user_id

;

优化后:90s

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

select

count(1) `公众号+app用户数`

from (

select

    user_id

    from

    (

        select cast(user_id as bigint) user_id

        from sdm.sdm_user_follow_status_full_d

        where dt='2021-11-08'

        and third_type in (7,420,443,445)

        and follow_flag=1

        and user_id>0

       

        union all

         

        select  cast(user_id as bigint) user_id

        from sdm.sdm_biz_app_visit_history_full_d

        where dt=date_sub(current_date(),1)

        and user_id>0

        group by cast(user_id as bigint)

    ) t

group by

    user_id

) a

;

select

count(1) `公众号+app&企信用户数`

from (

select

  b.user_id

  from

(

    select cast(user_id as bigint) user_id

    from sdm.sdm_user_follow_status_full_d

    where dt='2021-11-08'

    and third_type in (7,420,443,445)

    and follow_flag=1

    and user_id>0

   

    union all

     

    select  cast(user_id as bigint) user_id

    from sdm.sdm_biz_app_visit_history_full_d

    where dt=date_sub(current_date(),1)

    and user_id>0

    group by cast(user_id as bigint)

   

)a

left join

(

  select outer_sd_user_id as user_id

  from sdm.sdm_sdb_crm_wechat_qy_user_outer_relation_full_d

  where dt='2021-11-08'

  and outer_sd_user_id>0

  and is_delete = 0

  and valid = 1

)b on a.user_id=b.user_id

group by

b.user_id

) a

;

6-2:一种关联产生的数据倾斜

优化前:30min以上

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

SELECT

       count(DISTINCT c.user_id),

       c.register_city,

       c.register_province

FROM

  (SELECT from sdm.sdm_user_account_full_d where dt=date_sub(current_date(),1))b

LEFT JOIN

  (SELECT user_id,

          crypto_mobile,

          register_city,

          register_province

   FROM dwb.dwb_user_tag_info_full_d

   WHERE dt=date_sub(current_date(),1)

   and crypto_mobile<>''

   )c ON b.crypto_mobile = c.crypto_mobile

GROUP BY

         c.register_city,

         c.register_province

优化后:147s

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

SELECT

       count(DISTINCT c.user_id),

       c.register_city,

       c.register_province

FROM

  (SELECT from sdm.sdm_user_account_full_d where dt=date_sub(current_date(),1))b

LEFT JOIN

  (SELECT user_id,

          crypto_mobile,

          register_city,

          register_province

   FROM dwb.dwb_user_tag_info_full_d

   WHERE dt=date_sub(current_date(),1)

   and crypto_mobile<>''

   )c ON case when nvl(b.crypto_mobile,'')='' then concat('hive',rand()) else b.crypto_mobile end = c.crypto_mobile

GROUP BY

         c.register_city,

         c.register_province

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值