Mapper类
package org.nanxiuzi.hbase_demo.mr.read;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
public class readStudentMapper extends TableMapper<ImmutableBytesWritable, Put> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
Put put = new Put(key.get());
for (Cell cell : value.rawCells()) {
if("name".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))
|| "addr".equals(Bytes.toString(CellUtil.cloneQualifier(cell)))){
put.add(cell);
}
}
context.write(key,put);
}
}
Reducer类
package org.nanxiuzi.hbase_demo.mr.read;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.NullWritable;
import java.io.IOException;
public class readStudentReducer extends TableReducer<ImmutableBytesWritable, Put, NullWritable> {
@Override
protected void reduce(ImmutableBytesWritable key, Iterable<Put> values, Context context) throws IOException, InterruptedException {
for (Put value : values) {
context.write(NullWritable.get(),value);
}
}
}
Driver类
package org.nanxiuzi.hbase_demo.mr.read;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class readStudentDriver implements Tool {
private Configuration configuration = null;
@Override
public int run(String[] args) throws Exception {
String soutceTableName = args[0];
String targetTableName = args[1];
Job job = Job.getInstance(configuration);
job.setJarByClass(readStudentDriver.class);
TableMapReduceUtil.initTableMapperJob(soutceTableName,
new Scan(),
readStudentMapper.class,
ImmutableBytesWritable.class,
Put.class, job);
TableMapReduceUtil.initTableReducerJob(
targetTableName,
readStudentReducer.class,
job);
boolean b = job.waitForCompletion(true);
return b ? 0 : 1;
}
@Override
public void setConf(Configuration conf) {
configuration = conf;
}
@Override
public Configuration getConf() {
return configuration;
}
public static void main(String[] args) {
try {
Configuration conf = new Configuration();
int run = ToolRunner.run(conf, new readStudentDriver(), args);
System.exit(run);
}catch (Exception e){
e.printStackTrace();
}
}
}
创建新的hbase表
create 'filestudent2','info'
运行任务
yarn jar /home/hrx/readFhbase/hbase_demo-1.0-SNAPSHOT.jar org.nanxiuzi.hbase_demo.mr.read.readStudentDriver filestudent filestudent2
查看结果
hbase(main):006:0> scan 'filestudent2'
ROW COLUMN+CELL
1001 column=info:addr, timestamp=1617157683187, value=beijing
1001 column=info:name, timestamp=1617157683187, value=lixiang
1002 column=info:addr, timestamp=1617157687929, value=shanghai
1002 column=info:name, timestamp=1617157687929, value=zhaoyun
1003 column=info:addr, timestamp=1617157696338, value=xinjiang
1003 column=info:name, timestamp=1617157696338, value=zhangqinag
1005 column=info:addr, timestamp=1617157692253, value=haerbin
1005 column=info:name, timestamp=1617157692253, value=huansdd
1006 column=info:addr, timestamp=1617158486534, value=nanning
1006 column=info:name, timestamp=1617158486534, value=fengtian
5 row(s)
Took 0.0472 seconds
hbase(main):007:0>