Hbase 也可以做一些MapReduce操作
Hbase的MaprReuce 无非三种 :
- HDFS 中的数据 成为 Hbase 的某个表的某一列
- HBase中的某一列 成为HDFS 中的数据
- HBase某一表某列 加工 流入 HBase另一表中某列
实现Demo如下 :
1.创建两个表 插入模板数据
public class HbaseMR {
private static Configuration conf;
private static Connection conn;
private static Admin admin;
static{
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","hadoop01:2181,hadoop02:2181,hadoop03:2181");
try {
conn = ConnectionFactory.createConnection(conf);
} catch (IOException e) {
e.printStackTrace();
}
}
public static void initTable(){
try {
//创建两个表
admin = conn.getAdmin();
HTableDescriptor word = new HTableDescriptor(TableName.valueOf("word"));
HTableDescriptor stat = new HTableDescriptor(TableName.valueOf("stat"));
HColumnDescriptor content = new HColumnDescriptor("content");
word.addFamily(content);
stat.addFamily(content);
admin.createTable(word);
admin.createTable(stat);
//初始化第一个表的数据
Table table = conn.getTable(TableName.valueOf("word"));
table.setAutoFlushTo(false);
table.setWriteBufferSize(5);
List<Put> lp = new ArrayList<Put>();
Put p1 = new Put(Bytes.toBytes("1"));
p1.add("content".getBytes(), "info".getBytes(), ("The Apache Hadoop software library is a framework").getBytes());
lp.add(p1);
Put p2 = new Put(Bytes.toBytes("2"));
p2.add("content".getBytes(),"info".getBytes(),("The common utilities that support the other Hadoop modules").getBytes());
lp.add(p2);
Put p3 = new Put(Bytes.toBytes("3"));
p3.add("content".getBytes(), "info".getBytes(),("Hadoop by reading the documentation").getBytes());
lp.add(p3);
Put p4 = new Put(Bytes.toBytes("4"));
p4.add("content".getBytes(), "info".getBytes(),("Hadoop from the release page").getBytes());
lp.add(p4);
Put p5 = new Put(Bytes.toBytes("5"));
p5.add("content".getBytes(), "info".getBytes(),("Hadoop on the mailing list").getBytes());
lp.add(p5);
table.put(lp);
table.flushCommits();
} catch (Exception e) {
e.printStackTrace();
}
}
}
**2.Mapper 要继承TableMapper
public class HbaseMapper extends TableMapper<Text,IntWritable> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
//通过 value获取某一列族中的某一列 进行加工
byte[] l = value.getValue(Bytes.toBytes("content"), Bytes.toBytes("info"));
String line = new String(l);
String[] split = line.split(" ");
for (String s : split) {
context.write(new Text(s),new IntWritable(1));
}
}
}
**3.Reduce 类 要继承TableReduce
public class HbaseReduce extends TableReducer<Text,IntWritable,ImmutableBytesWritable>{
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
int i = Integer.parseInt(value.toString());
sum =sum+i;
}
Put put = new Put(Bytes.toBytes(key.toString()));
put.add(Bytes.toBytes("content"),Bytes.toBytes("info"),Bytes.toBytes(String.valueOf(sum)));
context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())),put);
}
}
4.重点:Driver类
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum","hadoop01:2181,hadoop02:2181,hadoop03:2181");
Job job = Job.getInstance(conf);
job.setJarByClass(HbaseDriver.class);
//初始化mapper任务 相当于设置mapper类
//参数 分别是: Hbase来源表名,new Scan(),Mapper类,输出key,输出value,job
TableMapReduceUtil.initTableMapperJob("word",new Scan(),HbaseMapper.class,Text.class,IntWritable.class,job);
//初始化reduce任务 相当于设置reduce类
//参数 分别是: Hbase目的表名,Reduce类,job
jobTableMapReduceUtil.initTableReducerJob("stat",HbaseReduce.class,job);
job.waitForCompletion(true);
}