使用 bulk load 批量导数到 hbase
操作步骤
HBase Version: 2.2.7
Hive Version: 3.1.0
-
在hive shell里添加habse jar包
add jar /opt/apache/hive/lib/hive-hbase-handler-3.1.0.jar; add jar /opt/apache/hive/lib/hbase-client-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-common-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-common-2.0.0-alpha4-tests.jar; add jar /opt/apache/hive/lib/hbase-hadoop2-compat-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-hadoop2-compat-2.0.0-alpha4-tests.jar; add jar /opt/apache/hive/lib/hbase-hadoop-compat-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-http-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-mapreduce-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-metrics-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-metrics-api-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-prefix-tree-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-procedure-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-protocol-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-protocol-shaded-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-replication-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-server-2.0.0-alpha4.jar; add jar /opt/apache/hive/lib/hbase-shaded-miscellaneous-1.0.1.jar; add jar /opt/apache/hive/lib/hbase-shaded-netty-1.0.1.jar; add jar /opt/apache/hive/lib/hbase-shaded-protobuf-1.0.1.jar;
-
创建hive表,导入测试数据
use zxl_test; drop table hive_table; create table hive_table(key int, name string,age int, create_time string) stored as orc; insert into hive_table (key, name, age, create_time) values (1, 'a', 18, from_utc_timestamp(CURRENT_TIMESTAMP,'GMT+8')), (2, 'b', 19, from_utc_timestamp(CURRENT_TIMESTAMP,'GMT+8')), (3, 'c', 20, from_utc_timestamp(CURRENT_TIMESTAMP,'GMT+8'));
-
创建生成hfile的hive表
drop table hive_hfile_table; create table hive_hfile_table(key int, name string, age int, create_time string) stored as INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat' TBLPROPERTIES ('hfile.family.path' = '/tmp/hive_hfile_table/cf');
-
向hive_hfile表插入数据
insert overwrite table hive_hfile_table select * from hive_table;
-
查看hfile是否生成
-- 查看hfile文件 hdfs dfs -ls /tmp/hive_hfile_table/cf -- 查看hfile文件具体信息 hbase hfile -v -p -m -f hdfs://bigbigworld/tmp/hive_hfile_table/cf/xxx
-
hbase建表
hbase shell create 'hbase_table', { NAME =>'cf',COMPRESSION => 'SNAPPY' }
-
BulkLoad加载hfile到HBase
hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles hdfs://bigbigworld/tmp/hive_hfile_table hbase_table
-
验证HBase数据
scan 'hbase_table'