使用MapReduce程序实现从hbase读写数据输出到hdfs分布式文件系统中

最新推荐文章于 2023-09-05 16:45:42 发布

A_Zhong20

最新推荐文章于 2023-09-05 16:45:42 发布

阅读量727

点赞数

分类专栏：笔记文章标签： hadoop mapreduce 大数据 hbase

本文链接：https://blog.csdn.net/A_Zhong20/article/details/119654228

版权

笔记专栏收录该内容

9 篇文章 1 订阅

订阅专栏

将hbase中的数据迁移到hdfs分布式文件系统中

package com.briup.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;
import java.util.Map;
import java.util.NavigableMap;


/**
 * Created by Intellij IDEA.
 *
 * @author zhudezhong
 * @date 2021/8/11 9:45
 */
public class HbaseToHdfs extends Configured implements Tool {

    public static class HbaseToHdfsMapper extends TableMapper<Text, NullWritable> {
        /**
         * @param key     hbase中的行键 rowkey
         * @param value   hbase中的整行数据
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {

            StringBuilder sb = new StringBuilder();

            //行键
            String rk = new String(key.get());

            sb.append("rk:" + rk).append(",");

            //拿到hbase中的整行数据
            //map NavigableMap<列族，NavigableMap<表名，NavigableMap<时间版本，值>
            NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = value.getMap();

            for (Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> f : map.entrySet()) {
                //列族
                sb.append("cf:" + Bytes.toString(f.getKey())).append(",");

                //表名
                for (Map.Entry<byte[], NavigableMap<Long, byte[]>> q : f.getValue().entrySet()) {
                    sb.append("qv:" + Bytes.toString(q.getKey())).append(",");

                    //版本
                    for (Map.Entry<Long, byte[]> val : q.getValue().entrySet()) {
                        sb.append("VERSION:" + val.getKey()).append(",").append("value:" + Bytes.toString(val.getValue()));
                    }
                }
            }
            context.write(new Text(sb.toString()), NullWritable.get());
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        String output = conf.get("output");
        conf.set("hbase.zookeeper.quorum", "192.168.10.129:2181");

        Job job = Job.getInstance(conf);
        job.setJarByClass(this.getClass());
        job.setJobName("hbase2hdfs");

        //使用TableMapReduceUtil#initTableMapperJob方法设置map和reducer类，指定表，Scan扫描器，Mapper类，Mapper输出的键和值类型
        TableMapReduceUtil.initTableMapperJob("bd2101:emp", new Scan(), HbaseToHdfsMapper.class, Text.class, NullWritable.class, job);

        job.setNumReduceTasks(0);

        TextOutputFormat.setOutputPath(job, new Path(output));
        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static void main(String[] args) throws Exception {
        System.exit(new ToolRunner().run(new HbaseToHdfs(), args));
    }
}

将程序打成jar包发送发送到集群上运行
yarn jar BD2101-1.0-SNAPSHOT-jar-with-dependencies.jar com.briup.hbase.HbaseToHdfs -D output=/user/zhudz

将hdfs中文件的数据迁移到hbase表中存储

准备一个person.txt文件，并将其上传至hdfs文件系统中/user/zhudz下

vi persion.txt

2000,tom,male
3000,jake,female
4000,briup,male

package com.briup.hbase;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
 * Created by Intellij IDEA.
 *
 * @author zhudezhong
 * @date 2021/8/11 10:23
 */
public class HdfsToHbase extends Configured implements Tool {


    public static class HdfsToHbaseMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

        /**
         * @param key     map输入键的为行偏移量
         * @param value   map输入值为每一行的内容
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            context.write(value, NullWritable.get());
        }
    }

    public static class HdfsToHbaseReducer extends TableReducer<Text, NullWritable, NullWritable> {

        /**
         * @param key     map输出的键为每一行的内容
         * @param values  map输出的值为NullWritable
         * @param context
         * @throws IOException
         * @throws InterruptedException
         */
        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
            String[] line = key.toString().split(",");

            //指定行键
            Put put = new Put(Bytes.toBytes(line[0]));
            put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("name"), Bytes.toBytes(line[1]));
            put.addColumn(Bytes.toBytes("f1"), Bytes.toBytes("gender"), Bytes.toBytes(line[2]));

            context.write(NullWritable.get(), put);
        }
    }

    @Override
    public int run(String[] strings) throws Exception {
        Configuration conf = getConf();
        conf.set("hbase.zookeeper.quorum", "106.14.59.12:2181");

        Job job = Job.getInstance(conf);
        job.setJarByClass(this.getClass());
        job.setJobName("hdfs2hbase");

        job.setMapperClass(HdfsToHbaseMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

        TableMapReduceUtil.initTableReducerJob("briup:emp", HdfsToHbaseReducer.class, job);

        TextInputFormat.addInputPath(job, new Path("/user/zhudz/person.txt"));

        return job.waitForCompletion(true) ? 0 : -1;
    }

    public static void main(String[] args) throws Exception {
        System.exit(new ToolRunner().run(new HdfsToHbase(), args));
    }
}

将程序打成jar包发送发送到集群上运行
yarn jar BD2101-1.0-SNAPSHOT-jar-with-dependencies.jar com.briup.hbase.HdfsToHbase

A_Zhong20

关注

0
点赞
踩
4

收藏

觉得还不错? 一键收藏
打赏
0
评论
使用MapReduce程序实现从hbase读写数据输出到hdfs分布式文件系统中

将hbase中的数据迁移到hdfs分布式文件系统中package com.briup.hbase;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.clie.
复制链接

扫一扫