需求:将HBase中user表的列族为info,列名为name和age的数据迁移到另一个表basic中
代码如下
package com.bpf.hbase;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class User2Basic extends Configured implements Tool {
//Mapper Class
public static class ReadUserMapper extends TableMapper<Text, Put> {
private Text mapOututKey = new Text();
@Override
public void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//get rowkey
String rowkey = Bytes.toString(key.get());
mapOututKey.set(rowkey);
Put put = new Put(key.get());
//iterator
for(Cell cell : value.rawCells()) {
// family:info
if("info".equals(Bytes.toString(CellUtil.cloneFamily(cell)))){
//column:name
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
put.add(cell);
}
//column:name
if("age".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))) {
put.add(cell);
}
}
}
context.write(mapOututKey, put);
}
}
//Reducer Class
public static class WriteBasicReducer extends TableReducer<Text, Put, ImmutableBytesWritable> {
@Override
protected void reduce(Text key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put put : values) {
context.write(null, put);
}
}
}
//driver
public int run(String[] arg0) throws Exception {
//create job
Job job = Job.getInstance(this.getConf(),this.getClass().getSimpleName());
//set run job class
job.setJarByClass(this.getClass());
//set job
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
//set input and mapper
TableMapReduceUtil.initTableMapperJob(
"user", // input table
scan, // Scan instance to control CF and attribute selection
ReadUserMapper.class, // mapper class
Text.class, // mapper output key
Put.class, // mapper output value
job);
//set output and reducer
TableMapReduceUtil.initTableReducerJob(
"basic", // output table
WriteBasicReducer.class, // reducer class
job);
job.setNumReduceTasks(1); // at least one, adjust as required
boolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
//get configuration
Configuration conf = HBaseConfiguration.create();
//submit job
int status = ToolRunner.run(conf, new User2Basic(), args);
//exit program
System.exit(status);
}
}
打包后(不需要将jar包打进去)上传到HADOOP环境中运行