HBase02
HBase和MapReduce集成
Maven依赖
<!-- hadoop-hdhf所需依赖-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<!--Hadoop-yarn 依赖 -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.6.0</version>
</dependency>
<!--HBase 依赖-->
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-protocol</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.4</version>
</dependency>
<!-- hbase结束 -->
模拟问题
例: 使用MapReduce计算模型,统计HBase中 zpark:u_user 表用户的平均年龄
代码实现
MyMapoper
package com.baizhi.hbase_mapreduce;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import java.io.IOException;
public class MyMapper extends TableMapper<Text,IntWritable> {
@Override
protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
int age = Bytes.toInt(value.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("age")));
System.out.println("mapper++++++++"+age);
context.write(new Text("age"),new IntWritable(age));
}
}
MyReduce
package com.baizhi.hbase_mapreduce;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import javax.ws.rs.PUT;
import java.io.IOException;
// 注意:hbase和mapreduce整合 keyout类型无关
public class MyReduce extends TableReducer<Text,IntWritable,NullWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
int count = 0;
for (IntWritable value : values) {
sum += value.get();
count++;
}
double avgAge = sum/count;
System.out.println(avgAge+"reduce+++++++++++++++++++++++");
// 定义放入HBase的rowKey
Put put = new Put("result20190320".getBytes());
put.addColumn("cf1".getBytes(),"avgAge".getBytes(), Bytes.toBytes(avgAge));
// Mutation 相当于put指令
context.write(null,put);
}
}
InitMR
package com.baizhi.hbase_mapreduce;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import java.io.IOException;
public class InitMR {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 因为HBase的服务在zk上注册, 需要通过zk来获取HBase服务建立连接
Configuration configuration = HBaseConfiguration.create();
configuration.set(HConstants.ZOOKEEPER_QUORUM,"hadoop");
configuration.set(HConstants.ZOOKEEPER_CLIENT_PORT,"2181");
// 1 初始化MR任务对象
Job job = Job.getInstance(configuration, "avgAge");
job.setJarByClass(InitMR.class);
// 2 设置数据的输入类型和输出类型
// 基于表格的数据输入和输出格式
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(TableOutputFormat.class);
//3. 设置数据集的来源和计算结果的输出目的地
Scan scan = new Scan();
scan.setStartRow("com:002".getBytes());
scan.setStopRow("com:009".getBytes());
TableMapReduceUtil.initTableMapperJob("zpark:u_user",scan,MyMapper.class, Text.class, IntWritable.class,job);
TableMapReduceUtil.initTableReducerJob("zpark:result",MyReduce.class,job);
// 提交任务
job.waitForCompletion(true);
}
}