一、环境准备
-
需要准备hadoop、zookeeper以及hbase集群
-
将HBASE_CLASSPATH加入环境变量:export HBASE_CLASSPATH=`$HBASE_HOME/bin/hbase mapredcp`
-
并将准备的如下数据上传hdfs,数据按照\t拆分
1001 zhangsan 19
1002 lisi 29
1003 wangwu 19
1004 xiaoming 29
1005 xiaobai 19
1006 xiaoli 29
1007 xiapeng 19 -
Hbase建表:create “stu”,“info”,“info1”
-
maven环境依赖
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<hbase.version>2.4.0</hbase.version>
<hadoop.version>3.1.3</hadoop.version>
</properties>
<dependencies>
<!--hbase读写-->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>${hbase.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-mapreduce</artifactId>
<version>${hbase.version}</version>
</dependency>
</dependencies>
二、HDFS TO HBASE
2.1、HdfsToHbaeDriver
package com.hpsk.mr.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HdfsToHbaeDriver implements Tool {
// 定义Configuration
private Configuration configuration = null;
@Override
public int run(String[] args) throws Exception {
// 1.获取Job对象
Job job = Job.getInstance(this.configuration);
// 2.设置驱动类路径
job.setJarByClass(HdfsToHbaeDriver.class);
// 3.设置Mapper&Mapper的输出KV类型
job.setMapperClass(HdfsToHbaeMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
// 4.设置Reducer&Reducer的输出KV类型
TableMapReduceUtil.initTableReducerJob(args[1],
HdfsToHbaeReducer.class,
job);
// 5.设置输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 6.提交任务
boolean result = job.waitForCompletion(true);
return result ? 0: 1;
}
@Override
public void setConf(Configuration configuration) {
this.configuration = configuration;
}
@Override
public Configuration getConf() {
return this.configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new HdfsToHbaeDriver(), args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}
2.2、HdfsToHbaeMapper
package com.hpsk.mr.hdfs;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class HdfsToHbaeMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 直接输出
context.write(key, value);
}
}
2.3、HdfsToHbaeReducer
package com.hpsk.mr.hdfs;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class HdfsToHbaeReducer extends TableReducer<LongWritable, Text, NullWritable> {
@Override
protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text value : values) {
// 1.切分字段
String[] fields = value.toString().split("\t");
// 2.创建put对象
Put put = new Put(Bytes.toBytes(fields[0]));
// 3.put赋值
put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(fields[1]));
put.addColumn(Bytes.toBytes("info1"), Bytes.toBytes("sex"), Bytes.toBytes(fields[2]));
// 4.写出
context.write(NullWritable.get(), put);
}
}
}
2.4、YARN提交任务
yarn jar myjars/hbase-1.0-SNAPSHOT.jar com.hpsk.mr.hdfs.HdfsToHbaeDriver /datas/stu.txt stu
三、HBASE TO HBASE
2.1、HBASE创建表
create "stu1","info"
3.1、HbaseToHbaseDriver
package com.hpsk.mr.hbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HbaseToHbaseDriver implements Tool {
private Configuration configuration = null;
@Override
public int run(String[] args) throws Exception {
// 1.获取Job对象
Job job = Job.getInstance(this.configuration);
// 2.设置Mapper的输入输出类
TableMapReduceUtil.initTableMapperJob(args[0],
new Scan(),
HbaseToHbaseMapper.class,
ImmutableBytesWritable.class,
Put.class,
job);
// 3. 设置Reducer的输出类
TableMapReduceUtil.initTableReducerJob(args[1],
HbaseToHbaseReducer.class,
job);
// 4.提交任务
boolean result = job.waitForCompletion(true);
return result ? 0 : 1;
}
@Override
public void setConf(Configuration configuration) {
this.configuration = configuration;
}
@Override
public Configuration getConf() {
return this.configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
int run = ToolRunner.run(configuration, new HbaseToHbaseDriver(), args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}
3.2、HbaseToHbaseMapper
package com.hpsk.mr.hbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class HbaseToHbaseMapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
// 创建put对象
Put put = new Put(key.get());
for (Cell cell : value.rawCells()) {
// 过滤name列数据
if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
// put对象赋值
put.add(cell);
}
}
// 写出
context.write(key, put);
}
}
3.3、HbaseToHbaseReducer
package com.hpsk.mr.hbase;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class HbaseToHbaseReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put value : values) {
context.write(NullWritable.get(), value);
}
}
}
3.4、YARN提交任务
yarn jar myjars/hbase-1.0-SNAPSHOT.jar com.hpsk.mr.hbase.HbaseToHbaseDriver stu stu1
四、HIVE TO HBASE
4.1、HIVE集成HBASE
-
hive-site.xml文件添加如下配置
<property> <name>hive.zookeeper.quorum</name> <value>hadoop102,hadoop103,hadoop104</value> </property> <property> <name>hive.zookeeper.client.port</name> <value>2181</value> </property>
4.2、HIVE中建表关联HBASE
CREATE TABLE hive_hbase_stu_table(
id int,
name string,
age string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:age")
TBLPROPERTIES ("hbase.table.name" = "hbase_stu_table");
4.3、HIVE中创建emp表
CREATE TABLE stu(
id int,
name string,
age string
)
row format delimited fields terminated by '\t';
4.4、导入本地数据到emp表
load data local inpath '/opt/datas/stu.txt' into table xtu;
数据:最前面准备的stu.txt
4.5、将emp表数据插入hive_hbase_emp_table
load data local inpath '/opt/datas/stu.txt' into table stu;
4.6、查看HBASE表数据
scan "hbase_stu_table"
五、HBASE TO HIVE
5.1、创建关联HBASE的HIVE外部表
-
建表语句
CREATE EXTERNAL TABLE relevance_hbase_stu( id int, name string, age string ) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:name,info:age") TBLPROPERTIES ("hbase.table.name" = "hbase_stu_table");
-
查看数据
select * from relevance_hbase_stu;
5.1、基于HIVE统计学生年纪范围
-
分析数据并插入结果表
create table stu_result as select case when cast(age as int) < 20 then '20以下' when cast(age as int) < 30 then '20到30' when cast(age as int) < 40 then '30到40' when cast(age as int) < 50 then '40到50' else '50以上' end as age_range, count(1) from relevance_hbase_stu group by case when cast(age as int) < 20 then '20以下' when cast(age as int) < 30 then '20到30' when cast(age as int) < 40 then '30到40' when cast(age as int) < 50 then '40到50' else '50以上' end;
-
查看结果
select * from stu_result ;