hBase与MapReduce的集成

最新推荐文章于 2021-03-07 16:03:04 发布

fql123455

最新推荐文章于 2021-03-07 16:03:04 发布

阅读量334

点赞数

分类专栏： Hadoop 文章标签： hBase MapReduce

本文链接：https://blog.csdn.net/fql123455/article/details/100014175

版权

Hadoop 专栏收录该内容

9 篇文章 0 订阅

订阅专栏

1.二者为什么要集成

HBase是一个基于Hadoop的数据库，它可靠、分布式适合结构化大数据的存储。其当然要与分布式的计算框架MapReduce相结合。

2.集成操作

1.创建Maven项目导入相关依赖

 <!--mapreduce + hbase-->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.6.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>2.6.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-common</artifactId>
        <version>2.6.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-core</artifactId>
        <version>2.6.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
        <version>2.6.0</version>
    </dependency>
    <dependency>
        <groupId>org.apache.hbase</groupId>
        <artifactId>hbase-server</artifactId>
        <version>1.2.4</version>
    </dependency>

2.创建测试表

@Test
public void testCreateOrderTable() throws IOException {
    boolean exists = admin.tableExists(TableName.valueOf("t_order"));
    if (exists) {
        admin.disableTable(TableName.valueOf("t_order"));
        admin.deleteTable(TableName.valueOf("t_order"));
    }
    HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf("t_order"));
    HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
    hTableDescriptor.addFamily(cf1);
    admin.createTable(hTableDescriptor);
}

3.插入测试数据


    @Test
    public  void testInset() throws IOException {
        Table table = connection.getTable(TableName.valueOf("t_order"));
        Put put1 = new Put(Bytes.toBytes("1:20181010153020100"));
        put1.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(2500.0D));
        put1.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("p20"));
        put1.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
        Put put2 = new Put(Bytes.toBytes("2:20180510121011233 "));
        put2.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(199.0D));
        put2.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("连衣裙"));
        put2.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
        Put put3 = new Put(Bytes.toBytes("3:20180612111111111"));
        put3.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(999.9D));
        put3.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("小天鹅洗衣机"));
        put3.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
        Put put4 = new Put(Bytes.toBytes("1:20181212011011111"));
        put4.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(200.0D));
        put4.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("搓衣板"));
        put4.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
        Put put5 = new Put(Bytes.toBytes("1:20190206101010101"));
        put5.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(10D));
        put5.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("钢丝球"));
        put5.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
        Put put6 = new Put(Bytes.toBytes("2:20180306101010101"));
        put6.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("money"), Bytes.toBytes(9.9D));
        put6.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("product"), Bytes.toBytes("丝袜"));
        put6.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("count"), Bytes.toBytes(1));
        ArrayList<Put> puts = new ArrayList<Put>();
        puts.add(put1);
        puts.add(put2);
        puts.add(put3);
        puts.add(put4);
        puts.add(put5);
        puts.add(put6);
        table.put(puts);
    }

4.创建输出表

//创建输出表
    @Test
    public void TestCreateTable() throws IOException {
        boolean exists = admin.tableExists(TableName.valueOf("t_result"));
        if(exists){
            admin.disableTable(TableName.valueOf("t_result"));
            admin.deleteTable(TableName.valueOf("t_result"));
        }
        HTableDescriptor hTableDescriptor = new HTableDescriptor(TableName.valueOf("t_result"));
        HColumnDescriptor cf1 = new HColumnDescriptor("cf1");
        hTableDescriptor.addFamily(cf1);
        admin.createTable(hTableDescriptor);
    }

5.编写自定义Mapper

package com.hw.hm;

import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;

/**
 * @aurhor:fql
 * @date 2019/8/22 12:59
 * @type:自定义的Mapper
 */

public class orderMapper extends TableMapper<Text, DoubleWritable> {
    /**
     *
     * @param key  rowkey
     * @param result  hbase中的一行记录
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */

    @Override
    protected void map(ImmutableBytesWritable key, Result result, Context context) throws IOException, InterruptedException {
        //获得主键
        String rowkey = Bytes.toString(key.get());
        //进行截取,获得id
        String userId = rowkey.split(":")[0];

        //获取到订单的金额
        double money = Bytes.toDouble(result.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("money")));

       context.write(new Text(userId),new DoubleWritable(money));
    }
}

6.编写自定义Reduce

package com.hw.hm;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;
import java.util.Iterator;

/**
 * @aurhor:fql
 * @date 2019/8/22 13:07
 * @type: 自定义的Reduce
 */
public class orderReduce extends TableReducer<Text, DoubleWritable, NullWritable> {
    /**
     * @param key     userId
     * @param values  本年度的订单金额总和
     * @param context
     * @throws IOException
     * @throws InterruptedException
     */

    @Override
    protected void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
        Double summoney = 0D;
        //获取到迭代器
        Iterator<DoubleWritable> iterator = values.iterator();
        while (iterator.hasNext()) {
            double money = iterator.next().get();
            summoney += money;
        }
        Put put = new Put((key.toString() + ":2018").getBytes());
        put.addColumn(Bytes.toBytes("cf1"), Bytes.toBytes("total"), Bytes.toBytes(summoney));
        context.write(null, put);
    }
}

7.编写Application

package com.hw.hm;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;


import java.io.IOException;

/**
 * @aurhor:fql
 * @date 2019/8/22 13:14
 * @type: 自定义初始化类
 */
public class OrderApplication {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration configuration = HBaseConfiguration.create();
        configuration.set(HConstants.ZOOKEEPER_QUORUM,"hadoop:2181");
        Job job = Job.getInstance(configuration, "order");
        job.setJarByClass(OrderApplication.class);

        job.setInputFormatClass(TableInputFormat.class);
        job.setOutputFormatClass(TableOutputFormat.class);

        //map任务的初始化
        Scan scan = new Scan();
        //使用正则表达式筛选结果
        RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator("^.*:2018.*$"));
        scan.setFilter(filter);
        //3-5步
        TableMapReduceUtil.initTableMapperJob(TableName.valueOf("t_order"),scan,orderMapper.class, Text.class, DoubleWritable.class,job);
        TableMapReduceUtil.initTableReducerJob("t_result", orderReduce.class, job);
        job.waitForCompletion(true);

    }
}

8.编写结果查看方法

 @Test
    public  void TestScan() throws IOException {
        Table table = connection.getTable(TableName.valueOf("t_result"));
        Result result = table.get(new Get(Bytes.toBytes("1:2018")));

        double total = Bytes.toDouble(result.getValue(Bytes.toBytes("cf1"), Bytes.toBytes("total")));
        System.out.println("用户1在2018年度的总账单为:"+total);
    }

在这里插入图片描述

fql123455

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
hBase与MapReduce的集成

1.二者为什么要集成HBase是一个基于Hadoop的数据库，它可靠、分布式适合结构化大数据的存储。其当然要与分布式的计算框架MapReduce相结合。2.集成操作1.创建Maven项目导入相关依赖  <dependency> <groupId>org.apache.hadoop...
复制链接

扫一扫

专栏目录