-- hbase两张大表join查询,存在性能与逻辑的问题(特别是分页)。
-- 不同数据,需要在进入Hbase前进行整合,大数据只存在一张表。
-- 两张 200W+的表join,内存不足报错
select t1.seq_id,t2.country from T_EXTENSION_SHOW t1
inner join T_EXTENSION_LOGIN t2
on(t1.show_date=t2.login_date and t1.email=t2.email)
where t1.show_date='2018-11-25';
-- 有趣的两个等效的sql,效率也差不多。 你们会选哪一个?
select t1.seq_id as seq_id from
(select 1 from T_EXTENSION_ALL_DATAS
where login_date='2018-11-24' and email='wjc@45555.com') t1
where t1.country='France' order by t1.seq_id desc limit 10;
select t1.seq_id as seq_id from
(select seq_id,country from T_EXTENSION_ALL_DATAS
where login_date='2018-11-24' and email='wjc@45555.com') t1
where t1.country='France' order by t1.seq_id desc limit 10; -- 推荐:语句清晰,易读。
-- exists
select t1. * from T_EXTENSION_SHOW t1 where
EXISTS(select 1 from T_EXTENSION_LOGIN
where login_date='2018-11-24'
and email='wjc@1.com'
and t1.show_date=login_date and t1.email=email);
-- 执行计划,这种情况有China值时查询会很快。当COUNTRY='China11'时没有数据,就慢。
SERVER FILTER BY FIRST KEY ONLY AND COUNTRY='China'