1、生成hdfs文件
创建hive表,这个表是用来生成适合bulkload的数据,被hbase导入之后该表为空表
create table stu_info(
rowkey string comment ‘rowkey’,
name string comment ‘姓名’
)
STORED AS
INPUTFORMAT ‘org.apache.hadoop.mapred.TextInputFormat’
OUTPUTFORMAT ‘org.apache.hadoop.hive.hbase.HiveHFileOutputFormat’
TBLPROPERTIES (‘hfile.family.path’ = ‘/user/hive-hbase/info’);
hfile.family.path的最后一级目录为hbase的列族,同样的,这种方式限制了hbase列族的个数
添加依赖jar
add jar /opt/apache-hive-2.3.4-bin/lib/hbase-client-1.1.1.jar;
add jar /opt/apache-hive-2.3.4-bin/lib/hbase-common-1.1.1.jar;
add jar /opt/apache-hive-2.3.4-bin/lib/hbase-server-1.1.1.jar;
add jar /opt/apache-hive-2.3.4-bin/lib/hbase-protocol-1.1.1.jar;
add jar /opt/apache-hive-2.3.4-bin/lib/hive-hbase-handler-2.3.4.jar;
随便添加点数据会看到有hdfs目录下有文件成成
insert into stu_info values(‘001’,‘zhangsan’);
2、创建hbase表
create ‘stu_info’,‘info’
3、桶导入
{HBASE_HOME}/bin/hbase bulkload
hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles
hdfs://node01:9000/user/hive-hbase/ stu_info