首先需要启动HDFS、zk、yarn、Hbase。
habse clean --cleanAll //清除zookeeper的信息
创建表 需要在Hbase中先创建"baizhi:t_employee"
create external table t_employee_hbase(empno INT,
ename STRING,
job STRING,
mgr INT,
hiredate TIMESTAMP,
sal DECIMAL(7,2),
comm DECIMAL(7,2),
deptno INT)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES("hbase.columns.mapping" = ":key,cf1:name,cf1:job,cf1:mgr,cf1:hiredate,cf1:sal,cf1:comm,cf1:deptno")
TBLPROPERTIES("hbase.table.name" = "baizhi:t_employee");
0: jdbc:hive2://CentOS:10000> select empno,ename,sal,deptno from t_employee_hbase;
+--------+-----------+-------+---------+--+
| empno | ename | sal | deptno |
+--------+-----------+-------+---------+--+
| 1 | zhangsan | NULL | 10 |
| 2 | lisi | 5000 | 20 |
+--------+-----------+-------+---------+--+
2 rows selected (0.447 seconds)
0: jdbc:hive2://CentOS:10000> select deptno,count(*) from t_employee_hbase group by deptno;
以上操作都是读操作,因此可以正常运行,当用户执行写操作的时候,将数据写入Hbase
0: jdbc:hive2://CentOS:10000> insert overwrite table t_employee_hbase select empno,ename,job,mgr,hiredate,sal,comm,deptno from t_employee;
系统会抛出以下错误。
ERROR : Ended Job = job_1578881006622_0016 with errors
Error: Error while processing statement: FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask (state=08S01,code=2)
0: jdbc:hive2://CentOS:10000>
解决:需要替换hive-hbase-handler-1.2.2.jar
详情
1.准备数据
7369,SMITH,CLERK,7902,1980-12-17 00:00:00,800,\N,20
7499,ALLEN,SALESMAN,7698,1981-02-20 00:00:00,1600,300,30
7521,WARD,SALESMAN,7698,1981-02-22 00:00:00,1250,500,30
7566,JONES,MANAGER,7839,1981-04-02 00:00:00,2975,\N,20
7654,MARTIN,SALESMAN,7698,1981-09-28 00:00:00,1250,1400,30
7698,BLAKE,MANAGER,7839,1981-05-01 00:00:00,2850,\N,30
7782,CLARK,MANAGER,7839,1981-06-09 00:00:00,2450,\N,10
7788,SCOTT,ANALYST,7566,1987-04-19 00:00:00,1500,\N,20
7839,KING,PRESIDENT,\N,1981-11-17 00:00:00,5000,\N,10
7844,TURNER,SALESMAN,7698,1981-09-08 00:00:00,1500,0,30
7876,ADAMS,CLERK,7788,1987-05-23 00:00:00,1100,\N,20
7900,JAMES,CLERK,7698,1981-12-03 00:00:00,950,\N,30
7902,FORD,ANALYST,7566,1981-12-03 00:00:00,3000,\N,20
7934,MILLER,CLERK,7782,1982-01-23 00:00:00,1300,\N,10
2.在hive中创建t_emp表
CREATE TABLE t_emp(
empno INT,
ename STRING,
job STRING,
mgr INT,
hiredate TIMESTAMP,
sal DECIMAL(7,2),
comm DECIMAL(7,2),
deptno INT)
row format delimited
fields terminated by ','
collection items terminated by '|'
map keys terminated by '>'
lines terminated by '\n'
stored as textfile;
3.加载数据到t_emp表
load data local inpath "/root/baizhi/t_emp" overwrite into table t_emp;
4.连接HBase在Hbase中建表
hbase(main):009:0> create_namespace 'tangc'
0 row(s) in 0.0550 seconds
hbase(main):010:0> create 'tangc:t_employee','cf1','cf2'
0 row(s) in 1.3070 seconds
=> Hbase::Table - tangc:t_employee
5.连接Hive建立hbase的映射表
create external table t_employee(empno INT,
ename STRING,
job STRING,
mgr INT,
hiredate TIMESTAMP,
sal DECIMAL(7,2),
comm DECIMAL(7,2),
deptno INT)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES("hbase.columns.mapping" = ":key,cf1:name,cf1:job,cf1:mgr,cf1:hiredate,cf1:sal,cf1:comm,cf1:deptno")
TBLPROPERTIES("hbase.table.name" = "tangc:t_employee");
6.执行SQL将结果迁移到Hbase中
use tangc;
insert overwrite table t_employee select empno,ename,job,mgr,hiredate,sal,comm,deptno from t_emp;
Diagnostic Messages for this Task:
Error: java.lang.RuntimeException: java.lang.NoSuchMethodError: org.apache.hadoop.hbase.client.Put.setDurability(Lorg/apache/hadoop/hbase/client/Durability;)V
at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:172)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:459)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:177)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:171)
Caused by: java.lang.NoSuchMethodError: org.apache.hadoop.hbase.client.Put.setDurability(Lorg/apache/hadoop/hbase/client/Durability;)V
at org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat$MyRecordWriter.write(HiveHBaseTableOutputFormat.java:142)
at org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat$MyRecordWriter.write(HiveHBaseTableOutputFormat.java:117)
at org.apache.hadoop.hive.ql.io.HivePassThroughRecordWriter.write(HivePassThroughRecordWriter.java:40)
at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:753)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:838)
at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:88)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:838)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:97)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:162)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:508)
at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:163)
... 8 more
FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
MapReduce Jobs Launched:
出现以上问题,原因是Hive和Hbase兼容性问题,需要编译hbase-handler源码。
解决
- 创建一个Maven项目,添加以下maven依赖
<groupId>com.baizhi</groupId>
<artifactId>hive-hbase-handler</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.4</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>6</source>
<target>6</target>
</configuration>
</plugin>
</plugins>
</build>
- 将hbase-handler源码拷贝到项目的src目录下 apache-hive-1.2.2-src.tar.gz中的src下的org
- 执行mvn package指令,打包生成hive-hbase-handler-1.2.2.jar,然后将该jar替换HIVE_HOME/lib下的hive-hbase-handler-1.2.2.jar下。
结果
[root@Hbase ~]# hive -f init.sql
Logging initialized using configuration in jar:file:/usr/apache-hive-1.2.2-bin/lib/hive-common-1.2.2.jar!/hive-log4j.properties
OK
Time taken: 6.131 seconds
Query ID = root_20200113200659_264c2b75-b247-4fb3-a862-43754a122eeb
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1578906385519_0010, Tracking URL = http://Hbase:8088/proxy/application_1578906385519_0010/
Kill Command = /usr/hadoop-2.9.2/bin/hadoop job -kill job_1578906385519_0010
Hadoop job information for Stage-0: number of mappers: 1; number of reducers: 0
2020-01-13 20:07:25,475 Stage-0 map = 0%, reduce = 0%
2020-01-13 20:07:42,759 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 4.37 sec
MapReduce Total cumulative CPU time: 4 seconds 370 msec
Ended Job = job_1578906385519_0010
MapReduce Jobs Launched:
Stage-Stage-0: Map: 1 Cumulative CPU: 4.37 sec HDFS Read: 12236 HDFS Write: 0 SUCCESS
Total MapReduce CPU Time Spent: 4 seconds 370 msec
OK
Time taken: 45.93 seconds