测试Hbase 表映射成 Hive表查询效率

最新推荐文章于 2024-07-23 10:46:14 发布

在屋顶听歌

最新推荐文章于 2024-07-23 10:46:14 发布

阅读量4k

点赞数 1

分类专栏： Hive Hbase

本文链接：https://blog.csdn.net/u013850277/article/details/78472568

版权

Hive 同时被 2 个专栏收录

14 篇文章 0 订阅

订阅专栏

Hbase

14 篇文章 0 订阅

订阅专栏

一、准备工作：

1、编写程序将1000万条数据写到Hbase表中；
2、将对应的Hbase表映射成Hive表。
在Hive 的shell中执行类似如下的命令

hive> CREATE EXTERNAL TABLE 
IF NOT EXISTS t_hbase_person_his10(id string, NAME String, salary string,START_DATE string,END_DATE string) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'  
WITH SERDEPROPERTIES ('hbase.columns.mapping' = ':key,info:id,info:name,info:salary,info:start_date,info:end_date') 
 TBLPROPERTIES ('hbase.table.name' ='t_hbase_person_his10');

3、复制一份一样的数据到Hive表，这份Hive数据是实际存在Hive中的。通过类似Sql复制

create table t_person_his10 as select * from t_hbase_person_his10;

二、通过Hive Jdbc 方式对比其查询时间，对比结果如下所示

其中t_hbase_person_his10 表为Hbase关联生成的Hive表，t_person_history10为实际的Hive表，来源通过create table t_person_his10 as select * from t_hbase_person_his10;

1、查当前数据，默认返回30条

sql = "select * from t_hbase_person_his10 where end_date='9999-12-31' limit 30";// use statTime:353ms 

sql = "select * from t_person_history10 where end_date='9999-12-31' limit 30";//use statTime:119ms

2、查指定日期数据，默认返回30条

sql = "select * from t_hbase_person_his10 where start_date<='2017-09-18' and end_date>='2017-09-18' and salary>990000 limit 30";//use statTime:411ms

sql = "select * from t_hbase_person_his10 where start_date<='2017-09-20' and end_date>='2017-09-20' and salary>990000 limit 30";//use statTime:908ms

sql = "select * from t_person_history10 where start_date<='2017-09-18' and end_date>='2017-09-18' and salary>990000 limit 30";//use statTime:147ms
sql = "select * from t_person_history10 where start_date<='2017-09-20' and end_date>='2017-09-20' and salary>990000 limit 30";//use statTime:266ms

3、order by效率很低

sql = "select * from t_hbase_person_his10 where start_date<='2017-09-20' and end_date>='2017-09-20' and salary>990000  order by salary limit 30";// use statTime:95000ms 

sql = "select * from t_person_history10 where start_date<='2017-09-20' and end_date>='2017-09-20' and salary>990000 order by salary limit 30";//use statTime:35836ms

4、between and

sql = "select * from t_hbase_person_his10 where end_date='9999-12-31' and salary between 500000 and 600000 limit 30";//use statTime:338ms

sql = "select * from t_person_history10 where end_date='9999-12-31' and salary between 500000 and 600000 limit 30";//use statTime:166ms

5、对指定用户进行溯源，这里以用户名为唯一标识，效率极低，（可以考虑用rowkey做为唯一标识）

sql = "SELECT mobile,start_date FROM t_hbase_person_his10 where name='hehe98'";//use statTime:86701ms 13901173602,2017-09-04 13201382515,2017-09-07 15107963040,2017-09-11
sql = "SELECT mobile,start_date FROM t_hbase_person_his10 where rowkey='1298'";//use statTime:316ms 

sql = "SELECT mobile,start_date FROM t_person_history10 where name='hehe98'";//use statTime:6326ms 13901173602,2017-09-04 13201382515,2017-09-07 15107963040,2017-09-11
sql = "SELECT mobile,start_date FROM t_person_history10 where rowkey='1298'";//use statTime:6288ms

6、group by

sql = "select start_date,count(1) from t_hbase_person_his10 group by start_date";//use statTime:100330ms

sql = "select start_date,count(1) from t_person_history10 group by start_date";//use statTime:25857ms

7、模糊查询

sql = "select * from t_hbase_person_his10 where name like '%hehe111%' limit 30";// use statTime:2738ms
sql = "select * from t_hbase_person_his10 where name like '%hehe111%'  and start_date>'2017-09-18' limit 10";// use statTime:2745ms
sql = "select * from t_hbase_person_his10 where rowkey like '%10059%'  and start_date>'2017-09-18' limit 10";// use statTime:665ms

sql = "select * from t_person_history10 where name like '%hehe111%'  and start_date>'2017-09-18' limit 10";// use statTime:257ms
sql = "select * from t_person_history10 where rowkey like '%10059%'  and start_date>'2017-09-18' limit 10";// use statTime:135ms
sql = "select * from t_person_history10 where name like '%hehe111%' limit 30";// use statTime:225ms

8、查询指定rowkey

sql = "select * from t_hbase_person_his10 where rowkey='11123'";//use statTime:342ms

sql = "select * from t_person_history10 where rowkey='11123'";//use statTime:8386ms

9、对Hive表进行关联查询

sql = "select th.mobile,th.start_date,tb.mobile from t_person_history10 th, t_hbase_person_his10 tb where th.name=tb.name limit 10";//use statTime:88614ms

sql = "select th.mobile,th.start_date,tb.mobile from t_person_history10 th left outer join t_hbase_person_his10 tb on  th.name=tb.name limit 10";//use statTime:88614ms