HBase基础使用02和MapReduce集成

HBase02

HBase和MapReduce集成

Maven依赖
<!-- hadoop-hdhf所需依赖-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.0</version>
        </dependency>
        <!--Hadoop-yarn 依赖 -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.6.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
            <version>2.6.0</version>
        </dependency>
        <!--HBase 依赖-->
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.4</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-common</artifactId>
            <version>1.2.4</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-protocol</artifactId>
            <version>1.2.4</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.4</version>
        </dependency>
        <!-- hbase结束 -->
模拟问题

例: 使用MapReduce计算模型,统计HBase中 zpark:u_user 表用户的平均年龄

代码实现

MyMapoper

package com.baizhi.hbase_mapreduce;


import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;

public class MyMapper extends TableMapper<Text,IntWritable> {
    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
        int age = Bytes.toInt(value.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("age")));
        System.out.println("mapper++++++++"+age);
        context.write(new Text("age"),new IntWritable(age));
    }
}

MyReduce

package com.baizhi.hbase_mapreduce;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import javax.ws.rs.PUT;
import java.io.IOException;

// 注意:hbase和mapreduce整合 keyout类型无关
public class MyReduce extends TableReducer<Text,IntWritable,NullWritable> {
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum  = 0;
        int count  = 0;
        for (IntWritable value : values) {
            sum += value.get();
            count++;
        }
        double avgAge = sum/count;
        System.out.println(avgAge+"reduce+++++++++++++++++++++++");
        // 定义放入HBase的rowKey
        Put put = new Put("result20190320".getBytes());
        put.addColumn("cf1".getBytes(),"avgAge".getBytes(), Bytes.toBytes(avgAge));
        // Mutation 相当于put指令
        context.write(null,put);
    }
}

InitMR

package com.baizhi.hbase_mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;

import java.io.IOException;

public class InitMR {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        // 因为HBase的服务在zk上注册, 需要通过zk来获取HBase服务建立连接
        Configuration configuration = HBaseConfiguration.create();
        configuration.set(HConstants.ZOOKEEPER_QUORUM,"hadoop");
        configuration.set(HConstants.ZOOKEEPER_CLIENT_PORT,"2181");
        // 1 初始化MR任务对象
        Job job = Job.getInstance(configuration, "avgAge");
        job.setJarByClass(InitMR.class);
        // 2 设置数据的输入类型和输出类型
        // 基于表格的数据输入和输出格式
        job.setInputFormatClass(TableInputFormat.class);
        job.setOutputFormatClass(TableOutputFormat.class);
        //3. 设置数据集的来源和计算结果的输出目的地
        Scan scan = new Scan();
        scan.setStartRow("com:002".getBytes());
        scan.setStopRow("com:009".getBytes());
        TableMapReduceUtil.initTableMapperJob("zpark:u_user",scan,MyMapper.class, Text.class, IntWritable.class,job);
        TableMapReduceUtil.initTableReducerJob("zpark:result",MyReduce.class,job);
        // 提交任务
        job.waitForCompletion(true);

    }
}
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值