用户画像之线上订单

#!/bin/sh

#支付偏好
paySql="
use tmp;
drop table if exists member_pay_prefer;
create table member_pay_prefer as
select * from
    (
        select a.*,row_number() over(partition by a.user_id order by a.num desc) as rn
        from 
        (
            select s.user_id,
            (case  
                 when p.payment_channel=1100 then '悠点卡支付'
                 when p.payment_channel=1001 then '伊点卡支付'
                 when p.payment_channel=11 or p.payment_channel=18 then '支付宝支付'
                 when p.payment_channel=8 or p.payment_channel=9 then '微信支付'
                 when p.payment_channel=1002 then '积分支付'
                 when p.payment_channel=1003 then '伊豆支付'
                 when p.payment_channel=1000 then '佣金支付'
                 else '其他'
                 end
            ) payment_channel,count(1) num from sourcedata.so_orderpay_fllow p left join sourcedata.so s
            on p.order_code=s.order_code where s.user_id is not null 
            group by s.user_id,payment_channel order by s.user_id desc
        ) a 
    ) t where rn<=3
"
hive -e "$paySql"

#商品偏好
productSql="
use tmp;
drop table if exists member_product_prefer;
create table member_product_prefer as 
select * from (
select a.*,row_number() over(partition by a.user_id order by a.num desc) as rn from 
(select user_id,third_merchant_product_code,sum(product_item_num) num  from sourcedata.so_item group by user_id,third_merchant_product_code) a )t where rn<=10
"
hive -e "$productSql"

#品类偏好
categorySql="
use tmp;
drop table if exists member_category_prefer;
create table member_category_prefer as
select * from (
select a.*,row_number() over(partition by a.user_id order by a.num desc) as rn from 
(select user_id,category_id,count(1) num from sourcedata.so_item group by user_id,category_id)a )t where rn<=5
"
hive -e "$categorySql"

#下单时间段偏好
timePreferSql="
use tmp;
drop table if exists member_time_prefer;
create table member_time_prefer as
select t.user_id,cast(t.am_buy_prefer as int) as am_prefer,cast(t.mid_buy_prefer as int) as mid_prefer ,
cast(t.pm_buy_prefer as int) as pm_prefer,cast(t.night_buy_prefer as int) as night_prefer,cast(t.tonight_buy_prefer as int) as tonight_prefer,
cast(t.lingchen_buy_prefer as int) as lc_prefer from
(select user_id,
sum(case when hour(create_time) >= 6 and hour(create_time)<10 then 1 else 0 end) am_buy_prefer,
sum(case when hour(create_time) >= 10 and hour(create_time)<14 then 1 else 0 end) mid_buy_prefer,
sum(case when hour(create_time) >= 14 and hour(create_time)<18 then 1 else 0 end) pm_buy_prefer,
sum(case when hour(create_time) >= 18 and hour(create_time)<22 then 1 else 0 end) night_buy_prefer ,
sum(case when (hour(create_time) >= 22 and hour(create_time)<=23) or (hour(create_time)>=0 and hour(create_time) <2) then 1 else 0 end) tonight_buy_prefer ,
sum(case when hour(create_time) >= 2 and hour(create_time)<6 then 1 else 0 end) lingchen_buy_prefer
from sourcedata.so group by user_id)t
"
#hive -e "$timePreferSql"
####线上订单
hiveSql="
select a.user_id,a.first_order_time,s.good_receiver_city as last_receive_city,(case when b.receive_province_num>1 then 1 else 0 end) is_multi_province,
a.last_order_time,a.last_buy_days,a.paynum,a.paytotalmoney,round((a.paytotalmoney/a.paynum),2) as avg_money,
a.lottery_paynum,a.group_paynum,a.onem_paynum,a.threem_paynum,a.sixm_paynum,a.max_paymoney,a.min_paymoney,a.cancel_num,
(case when (a.use_coupon_num/a.paynum)>0.5 then 1 else 0 end) is_prefer_coupon,
a.receive_address_num,round((c.skunum/a.paynum),0) as avg_skunum,nvl(d.refund_num,0)refund_num,nvl(e.bad_comment_num,0)bad_comment_num,
f.online_pay_prefer,g.online_category_prefer,h.online_product_prefer,
timeprefer(t.am_prefer,t.mid_prefer,t.pm_prefer,t.night_prefer,t.tonight_prefer,t.lc_prefer) as online_buytime_prefer,
'' as online_receive_address_type,'' as online_group,'' as online_fail_group
from
(select user_id,min(case when order_status !=34 and order_payment_status=3 then order_payment_confirm_date end) first_order_time,
max(case when order_status !=34 and order_payment_status=3 then order_payment_confirm_date end) last_order_time,
datediff(current_timestamp, max(case when order_status !=34 and order_payment_status=3 then order_payment_confirm_date end)) last_buy_days,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 then 1 else 0 end) paynum,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 then (order_amount+order_delivery_fee) else 0 end) paytotalmoney,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type=6 then 1 else 0 end) lottery_paynum,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type=1 then 1 else 0 end) group_paynum,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 
    and order_payment_confirm_date > date_sub(current_timestamp,30) then 1 else 0 end) onem_paynum,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 
    and order_payment_confirm_date > date_sub(current_timestamp,90) then 1 else 0 end) threem_paynum,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 
    and order_payment_confirm_date > date_sub(current_timestamp,180) then 1 else 0 end) sixm_paynum,
max(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 then (order_amount+order_delivery_fee) end) max_paymoney,
min(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 then (order_amount+order_delivery_fee) end) min_paymoney,
sum(case when order_status=34 then 1 else 0 end) cancel_num,
sum(case when order_status !=34 and order_payment_status=3 and order_promotion_type!=6 and order_paid_by_coupon >0 then 1 else 0 end) use_coupon_num,
count(distinct good_receiver_address) receive_address_num
from sourcedata.so where is_leaf=1  group by user_id ) a
left join 
sourcedata.so s on a.user_id=s.user_id and a.last_order_time=s.order_payment_confirm_date
left join
(select user_id,count(distinct good_receiver_province) receive_province_num from sourcedata.so group by user_id) b on a.user_id=b.user_id
left join 
(select sum(sku_num) skunum,user_id from(
select user_id,order_code,count(distinct third_merchant_product_code) sku_num from 
(select b.user_id,b.order_code,b.third_merchant_product_code from sourcedata.so a left join sourcedata.so_item b 
on a.order_code=b.order_code and a.order_status !=34 and a.order_payment_status=3 and a.order_promotion_type!=6)t
 group by user_id,order_code) t group by user_id) c on a.user_id=c.user_id
 left join
 (select user_id,count(distinct order_code) refund_num from sourcedata.so_return where refund_time is null group by user_id) d on a.user_id=d.user_id
 left join
 (select user_id,count(1) bad_comment_num from sourcedata.sns_merchant_product_comment where rate_flag=3 group by user_id) e on a.user_id=e.user_id
 left join
 (select user_id,concat_ws(',', collect_set(payment_channel)) online_pay_prefer from tmp.member_pay_prefer group by user_id)f on a.user_id=f.user_id
 left join
 (select user_id,concat_ws(',', collect_set(cast(category_id as string))) online_category_prefer from tmp.member_category_prefer group by user_id)g on a.user_id=g.user_id
 left join
  (select user_id,concat_ws(',', collect_set(third_merchant_product_code)) online_product_prefer from tmp.member_product_prefer group by user_id)h on a.user_id=h.user_id
 left join
  (select user_id,am_prefer,mid_prefer,pm_prefer,night_prefer,tonight_prefer,lc_prefer from tmp.member_time_prefer) t on a.user_id=t.user_id
";

#下单时间段,最后写个udf进行排序,暂不做

dropSql="drop table if exists member_online_order_info"

hive -e "
use tmp;
drop table if exists member_online_order;
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
add jar /usr/local/azkaban/sqoop_job/personas/timeprefer.jar;
CREATE TEMPORARY FUNCTION timeprefer AS 'com.laiyifen.hiveudf.MemberOrderTimePrefer';
create table member_online_order as 
$hiveSql
"

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值