自定义HBase-MapReDuce2——从Hbase表中读取数据,经过MR,再返回到一个新的Hbase表中
1)需求分析
从fruit表中抽出info:name信息,放入fruit2表中
2)编写Mapper
package com.yingzi.mr2;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
/**
* @author 影子
* @create 2022-01-31-15:34
**/
public class Fruit2Mapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//构建Put对象
Put put = new Put(key.get());
//1.获取数据
for (Cell cell : value.rawCells()) {
//2.判断当前的cell是否为“name”列
if ("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
//3.给Put对象赋值
put.add(cell);
}
}
//4.写出
context.write(key,put);
}
}
3)编写Reducer
package com.yingzi.mr2;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* @author 影子
* @create 2022-01-31-15:34
**/
public class Fruit2Reducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
//遍历写出
for (Put value : values) {
context.write(NullWritable.get(),value);
}
}
}
4)编写Driver
package com.yingzi.mr2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
* @author 影子
* @create 2022-01-31-15:35
**/
public class Fruit2Driver implements Tool {
//定义配置信息
Configuration configuration = null;
@Override
public int run(String[] strings) throws Exception {
//1.获取Job对象
Job job = Job.getInstance(configuration);
//2.设置主类路径
job.setJarByClass(Fruit2Driver.class);
//3.设置Mapper&输出KV类型
TableMapReduceUtil.initTableMapperJob("fruit",
new Scan(),
Fruit2Mapper.class,
ImmutableBytesWritable.class,
Put.class,
job);
//4.设置Reducer&输出的表
TableMapReduceUtil.initTableReducerJob("fruit2",
Fruit2Reducer.class,
job);
//5.提交任务
boolean result = job.waitForCompletion(true);
return result?0:1;
}
@Override
public void setConf(Configuration configuration) {
this.configuration = configuration;
}
@Override
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
Configuration configuration = new Configuration();
ToolRunner.run(configuration,new Fruit2Driver(),args);
} catch (Exception e) {
e.printStackTrace();
}
}
}
5)本地连接Hbase
在resources目录下配置hbase-site.xml文件
内容为虚拟机上的/opt/module/hbase-1.3.1/conf/hbase-site.xml
6)检验
运行Driver程序,查看“fruit2”表信息如下: