hive 实战笔记case2

– case2 –

–========== click_log ==========–

/*
11  ad_101  2014-05-01 06:01:12.334+01
22  ad_102  2014-05-01 07:28:12.342+01
33  ad_103  2014-05-01 07:50:12.33+01
11  ad_104  2014-05-01 09:27:12.33+01
22  ad_103  2014-05-01 09:03:12.324+01
33  ad_102  2014-05-02 19:10:12.343+01
11  ad_101  2014-05-02 09:07:12.344+01
35  ad_105  2014-05-03 11:07:12.339+01
22  ad_104  2014-05-03 12:59:12.743+01
77  ad_103  2014-05-03 18:04:12.355+01
99  ad_102  2014-05-04 00:36:39.713+01
33  ad_101  2014-05-04 19:10:12.343+01
11  ad_101  2014-05-05 09:07:12.344+01
35  ad_102  2014-05-05 11:07:12.339+01
22  ad_103  2014-05-05 12:59:12.743+01
77  ad_104  2014-05-05 18:04:12.355+01
99  ad_105  2014-05-05 20:36:39.713+01
*/
CREATE EXTERNAL TABLE click_log (
    cookie_id  STRING
  , ad_id      STRING
  , ts         STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/tmp/db_case2/click_log';

select cookie_id, collect_set(ad_id) as orders
from click_log
--where ts > '2014-05-02'
group by cookie_id;

select cookie_id, group_concat(ad_id, '|') as orders
from click_log
--where ts > '2014-05-02'
group by cookie_id;

–========== ad_list ==========–

/*
ad_101  http://abcn.net/    catalog8|catalog1
ad_102  http://www.abcn.net/    catalog6|catalog3
ad_103  http://fxlive.de/   catalog7
ad_104  http://fxlive.fr/   catalog5|catalog1|catalog4|catalog9
ad_105  http://fxlive.eu/   
*/
CREATE EXTERNAL TABLE ad_list (
    ad_id    STRING
  , url      STRING
  , catalogs array<STRING>
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
LOCATION '/tmp/db_case2/ad_list';

CREATE EXTERNAL TABLE ad_list_string (
   ad_id    STRING
 , url      STRING
 , catalogs STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/tmp/db_case2/ad_list';

select click.cookie_id, click.ad_id, click.amount, ad_list_string.catalogs as orders
from (
  select cookie_id, ad_id, count(1) as amount from click_log group by cookie_id, ad_id
) click
join ad_list_string
on (ad_list_string.ad_id = click.ad_id);


select ad_id, catalog from ad_list LATERAL VIEW OUTER explode(catalogs) t AS catalog;
select ad_id, collect_set(catalog) from ad_list LATERAL VIEW OUTER explode(catalogs) t AS catalog group by ad_id;


select click.cookie_id, ad.catalog from click_log click
left outer join (
  select ad_id, catalog from ad_list LATERAL VIEW OUTER explode(catalogs) t AS catalog
) ad
on (click.ad_id = ad.ad_id);


create table cookie_cats as
select click.cookie_id, ad.catalog, count(1) as weight from click_log click
left outer join (
  select ad_id, catalog from ad_list LATERAL VIEW OUTER explode(catalogs) t AS catalog
) ad
on (click.ad_id = ad.ad_id)
group by click.cookie_id, ad.catalog
order by cookie_id, weight desc;

select cookie_id, collect_set(catalog) from cookie_cats group by cookie_id; -- where catalog is not null
select cookie_id, group_concat(catalog, '|') from cookie_cats group by cookie_id; -- impala group_concat
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值