以下代码是使用MR任务读取HBASE的表,然后再写到HBASE表中, 也可以写到另外的HBASE集群的表中
Mapper
package com.MRforHbaeToHbase;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
import java.util.List;
public class hbaseMapper extends TableMapper<NullWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
byte[] value1 = value.getValue(Bytes.toBytes("cf"), Bytes.toBytes("name"));
Put put = new Put(key.copyBytes());
put.addColumn(Bytes.toBytes("cf"),Bytes.toBytes("name"),value1);
//Cell current = value.current();
context.write(NullWritable.get(),put);
//====================
//第2中方法 取指定的列名
for (Cell cell : value.rawCells()) {
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
put.add(cell);
}
}
}
}
reduce
package com.MRforHbaeToHbase;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class hbaseReduce extends TableReducer<NullWritable, Put,NullWritable> {
@Override
protected void reduce(NullWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put value : values) {
context.write(NullWritable.get(),value);
}
}
}
MainJob
package com.MRforHbaeToHbase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import java.io.IOException;
public class mainJob {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = HBaseConfiguration.create();
Job job = Job.getInstance(conf, "xxxx");
job.setJarByClass(mainJob.class);
//job.setMapperClass(hbaseMapper.class);
Scan scan = new Scan(); // 可以指定数据范围
scan.setCaching(500); //每次读取的数量,默认 Integer最大值
scan.setCacheBlocks(false); //关闭缓存,只读一遍 缓存无用
TableMapReduceUtil.initTableMapperJob("stu",
scan,
hbaseMapper.class,
NullWritable.class, Put.class,
job);
//TableMapReduceUtil.limitNumReduceTasks(); //does not exceed(超过) the number of regions for the given table.
//job.setReducerClass(hbaseReduce.class);
TableMapReduceUtil.initTableReducerJob("stu4",
hbaseReduce.class,
job);
/*TableMapReduceUtil.initTableReducerJob("stu4",
hbaseReduce.class,
job,null,
"zk:2015",
null,null);*/
boolean b = job.waitForCompletion(true);
System.exit(b?0:1);
}
}