需求:将学生表的相关数据的部分列导入到另一张表中
学生表的数据参考我的上一篇博客 https://blog.csdn.net/fanghailiang2016/article/details/107218724
map类负责将原始表的每一行按照需求解析成put对象
public class TestHbaseMapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
Put put = new Put(key.get());
for (Cell cell : value.rawCells()){
if ("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
put.add(cell);
}
}
}
context.write(key, put);
}
}
driver类整合map和reduce
public class TestHbaseDriver extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
Scan scan = new Scan();
Job job = Job.getInstance(this.getConf(), "import_hbase_data");
job.setJarByClass(TestHbaseDriver.class);
TableMapReduceUtil.initTableMapperJob(
"test:student",
scan,
TestHbaseMapper.class,
ImmutableBytesWritable.class,
Put.class,
job
);
TableMapReduceUtil.initTableReducerJob(
"test:student2",
null,
job
);
job.setNumReduceTasks(1);
return job.waitForCompletion(true) ? 0 : 1;
}
}
main方法:
public static void main(String[] args) {
Configuration conf = HBaseConfiguration.create();
try {
int status = ToolRunner.run(conf, new TestHbaseDriver(), args);
System.exit(status);
} catch (Exception e) {
e.printStackTrace();
}
}