通过HBase的相关JavaAPI,我们可以实现伴随HBase操作的MapReduce过程,比如使用MapReduce将HBase表中的数据拷贝到另外一张表。本文我们通过两个案例来进行实操一下,关注专栏《破茧成蝶——大数据篇》,查看更多相关的内容~
目录
一、将HBase表数据复制到另外一张表
1.1 需求说明
将xzw:people中的数据拷贝到xzw:user中。
1.2 编码实现
1、新建ScanDataMapper类用于获取xzw:people中的数据
package com.xzw.hbase_mr;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2021/3/17 13:46
* @desc: 用于获取people中的数据
* @modifier:
* @modified_date:
* @desc:
*/
public class ScanDataMapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result result, Context context) throws IOException,
InterruptedException {
//运行Mapper,查询数据
Put put = new Put(key.get());
for (Cell cell :
result.rawCells()) {
put.addColumn(
CellUtil.cloneFamily(cell),
CellUtil.cloneQualifier(cell),
CellUtil.cloneValue(cell)
);
}
context.write(key, put);
}
}
2、新建InsertDataReducer类用于将读取到的数据存到xzw:user表中
package com.xzw.hbase_mr;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2021/3/17 13:49
* @desc: 用于将读到的数据存到user表中
* @modifier:
* @modified_date:
* @desc:
*/
public class InsertDataReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException,
InterruptedException {
//运行Reducer,增加数据
for (Put put :
values) {
context.write(NullWritable.get(), put);
}
}
}
3、新建HBaseMapperReduceTool类用于组装运行job任务
package com.xzw.hbase_mr;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.util.Tool;
/**
* @author: xzw
* @create_date: 2021/3/17 10:55
* @desc: 用于组装运行job任务
* @modifier:
* @modified_date:
* @desc:
*/
public class HBaseMapperReduceTool implements Tool {
public int run(String[] strings) throws Exception {
//作业
Job job = Job.getInstance();
job.setJarByClass(HBaseMapperReduceTool.class);
//mapper
TableMapReduceUtil.initTableMapperJob(
"xzw:people",
new Scan(),
ScanDataMapper.class,
ImmutableBytesWritable.class,
Put.class,
job
);
//reducer
TableMapReduceUtil.initTableReducerJob(
"xzw:user",
InsertDataReducer.class,
job
);
//执行作业
boolean b = job.waitForCompletion(true);
return b ? JobStatus.State.SUCCEEDED.getValue(): JobStatus.State.FAILED.getValue();
}
public void setConf(Configuration configuration) {
}
public Configuration getConf() {
return null;
}
}
4、新建T2TApplication类用于启动程序
package com.xzw.hbase_mr;
import org.apache.hadoop.util.ToolRunner;
/**
* @author: xzw
* @create_date: 2021/3/17 10:53
* @desc:
* @modifier:
* @modified_date:
* @desc:
*/
public class T2TApplication {
public static void main(String[] args) throws Exception {
ToolRunner.run(new HBaseMapperReduceTool(), args);
}
}
1.3 测试
1、将代码打包提交到服务器上
2、运行代码
yarn jar hbase.jar com.xzw.hbase_mr.T2TApplication
3、查看user表发现,数据已经拷贝过来
二、将HDFS的数据写入到HBase中
2.1 需求说明
将HDFS上面的user.csv文件导入到HBase中的xzw:user表中,文件内容如下所示:
2.2 编码实现
1、新建ReadDataFromHDFSMapper类用于读取HDFS上的数据
package com.xzw.hbase_hdfs;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2021/3/18 9:46
* @desc: 从HDFS上读取数据
* @modifier:
* @modified_date:
* @desc:
*/
public class ReadDataFromHDFSMapper extends Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//从HDFS中读取的数据
String[] data = value.toString().split(",");
String rowkey = data[0];
String name = data[1];
String age = data[2];
//初始化rowkey
ImmutableBytesWritable immutableBytesWritable = new ImmutableBytesWritable(Bytes.toBytes(rowkey));
//初始化put对象
Put put = new Put(Bytes.toBytes(rowkey));
put.add(Bytes.toBytes("info"), Bytes.toBytes("name"), Bytes.toBytes(name));
put.add(Bytes.toBytes("info"), Bytes.toBytes("age"), Bytes.toBytes(age));
context.write(immutableBytesWritable, put);
}
}
2、新建WriteDataToHBaseReducer类用于将读到的数据写入HBase表
package com.xzw.hbase_hdfs;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2021/3/18 10:07
* @desc: 写数据到HBase中
* @modifier:
* @modified_date:
* @desc:
*/
public class WriteDataToHBaseReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException,
InterruptedException {
//将读出来的数据写入到HBase中
for (Put put :
values) {
context.write(NullWritable.get(), put);
}
}
}
3、新建HDFSToHBaseTool类用于组装运行job任务
package com.xzw.hbase_hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import java.io.IOException;
/**
* @author: xzw
* @create_date: 2021/3/18 10:21
* @desc:
* @modifier:
* @modified_date:
* @desc:
*/
public class HDFSToHBaseTool implements Tool {
public int run(String[] strings) throws Exception {
//获取Configuration
Configuration conf = new Configuration();
//创建job任务
Job job = Job.getInstance(conf);
job.setJarByClass(HDFSToHBaseTool.class);
Path path = new Path("hdfs://master:9000/xzw/user.csv");
FileInputFormat.addInputPath(job, path);
//设置Mapper
job.setMapperClass(ReadDataFromHDFSMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
//设置Reducer
TableMapReduceUtil.initTableReducerJob("xzw:user", WriteDataToHBaseReducer.class, job);
//设置Reducer的数量,最少一个
job.setNumReduceTasks(1);
boolean b = job.waitForCompletion(true);
return b ? 0 : 1;
}
public void setConf(Configuration configuration) {
}
public Configuration getConf() {
return null;
}
}
4、新建H2TApplication类用于启动程序
package com.xzw.hbase_hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.util.ToolRunner;
/**
* @author: xzw
* @create_date: 2021/3/18 13:38
* @desc:
* @modifier:
* @modified_date:
* @desc:
*/
public class H2TApplication {
public static void main(String[] args) throws Exception {
Configuration configuration = HBaseConfiguration.create();
ToolRunner.run(configuration, new HDFSToHBaseTool(), args);
}
}
2.3 测试
1、将代码打包提交到服务器上
2、运行jar包
yarn jar hbase_hdfs.jar com.xzw.hbase_hdfs.H2TApplication
3、查看user表发现,数据已经导入进来
好了,本文到此已经全部结束。你们在这个过程中遇到了什么问题,欢迎留言,让我看看你们遇到了什么问题~