MapReduce on HBase（一）

爱喝水的绿萝

已于 2022-01-22 19:52:02 修改

阅读量467

点赞数 1

分类专栏： HBase on MR HBase 文章标签： hbase mapreduce hadoop

于 2022-01-14 19:53:36 首次发布

本文链接：https://blog.csdn.net/chaohui2638457321/article/details/122501205

版权

HBase 同时被 2 个专栏收录

7 篇文章 0 订阅

订阅专栏

HBase on MR

3 篇文章 0 订阅

订阅专栏

1、自定义MapReduce

目标：将集群上的 fruit.tsv 文件通过编写 MapReduce程序，最终保存到HBase上的fruit表中

注意：需要提前在HBase上把表创建好

流程：

1、编写Mapper类，将读取到的内容传到Reducer中

package com.zch.hbase.mr1;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * Author: zhaoHui
 * Date: 2022/01/14
 * Time: 16:30
 * Description: 读取hdfs数据传到HBase的fruit表中
 */
public class FruitMapper extends Mapper<LongWritable, Text,LongWritable,Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        context.write(key,value);
    }
}

2、编写Reducer类，将传入的数据进行处理，赋值给Put对象，将其写出

package com.zch.hbase.mr1;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;

import java.io.IOException;

/**
 * Author: zhaoHui
 * Date: 2022/01/14
 * Time: 16:30
 * Description:
 */
public class FruitReducer extends TableReducer<LongWritable, Text, NullWritable> {

    @Override
    protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

        // 1、便利values
        for (Text value : values) {
            // 2、获取每一行对象
            String[] split = value.toString().split(" ");
            String rowKey = split[0];
            String name = split[1];
            String color = split[2];
            // 3、构建Put对象
            Put put = new Put(Bytes.toBytes(rowKey));
            // 4、给Put对象赋值
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("name"),Bytes.toBytes(name));
            put.addColumn(Bytes.toBytes("info"),Bytes.toBytes("color"),Bytes.toBytes(color));
            // 5、写出
            context.write(NullWritable.get(),put);
        }
    }
}

3、编写Driver类，创建Job对象，设置驱动类，TableMapReduceUtil设置输出到的表名，FileInputFormat设置输入路径

package com.zch.hbase.mr1;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * Author: zhaoHui
 * Date: 2022/01/14
 * Time: 16:30
 * Description: 打包mr程序到集群运行
 * [root@zhaohui01 hbase-1.3.1]# yarn jar hbase-1.0-SNAPSHOT.jar com.zch.hbase.mr1.FruitDriver /Fruit/fruit.tsv fruit
 */
public class FruitDriver implements Tool {

    private Configuration configuration = null;

    public int run(String[] args) throws Exception {

        // 1、获取Job对象
        Job job = Job.getInstance(configuration);
        // 2、设置驱动类
        job.setJarByClass(FruitDriver.class);
        // 3、设置Mapper&Mapper的输出类型
        job.setMapperClass(FruitMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        // 4、设置Reducer类
        TableMapReduceUtil.initTableReducerJob(args[1], FruitReducer.class, job);
        // 5、设置输入输出参数
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        // 6、提交任务
        boolean b = job.waitForCompletion(true);

        return b ? 0 : 1;

    }

    public void setConf(Configuration conf) {
        configuration = conf;
    }

    public Configuration getConf() {
        return configuration;
    }

    public static void main(String[] args) {

        try {
            Configuration configuration = new Configuration();
            int run = ToolRunner.run(configuration, new FruitDriver(), args);

            System.exit(run);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

爱喝水的绿萝

关注

1
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
MapReduce on HBase（一）

1、自定义MapReduce目标：将集群上的 fruit.tsv 文件通过编写 MapReduce程序，最终保存到HBase上的fruit表中注意：需要提前在HBase上把表创建好流程：1、编写Mapper类，将读取到的内容传到Reducer中package com.zch.hbase.mr1;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache
复制链接

扫一扫