HBASE(五 MapReduce)

最新推荐文章于 2022-04-30 23:47:21 发布

帅气的程序员

最新推荐文章于 2022-04-30 23:47:21 发布

阅读量469

点赞数

分类专栏：大数据-HBase 文章标签： hbase mapreduce

本文链接：https://blog.csdn.net/hr787753/article/details/78521346

版权

大数据-HBase 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

Hbase 也可以做一些MapReduce操作

Hbase的MaprReuce 无非三种：

HDFS 中的数据成为 Hbase 的某个表的某一列
HBase中的某一列成为HDFS 中的数据
HBase某一表某列加工流入 HBase另一表中某列

实现Demo如下：

1.创建两个表插入模板数据

public class HbaseMR {

    private static Configuration conf;
    private static Connection conn;
    private static Admin admin;

    static{
        conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","hadoop01:2181,hadoop02:2181,hadoop03:2181");
        try {
          conn = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    public static void initTable(){
        try {
            //创建两个表
            admin = conn.getAdmin();
            HTableDescriptor word = new HTableDescriptor(TableName.valueOf("word"));
            HTableDescriptor stat = new HTableDescriptor(TableName.valueOf("stat"));
            HColumnDescriptor content = new HColumnDescriptor("content");
            word.addFamily(content);
            stat.addFamily(content);
            admin.createTable(word);
            admin.createTable(stat);
            //初始化第一个表的数据
            Table table = conn.getTable(TableName.valueOf("word"));
            table.setAutoFlushTo(false);
            table.setWriteBufferSize(5);
            List<Put> lp = new ArrayList<Put>();
            Put p1 = new Put(Bytes.toBytes("1"));
            p1.add("content".getBytes(), "info".getBytes(), ("The Apache Hadoop software library is a framework").getBytes());
            lp.add(p1);
            Put p2 = new Put(Bytes.toBytes("2"));
            p2.add("content".getBytes(),"info".getBytes(),("The common utilities that support the other Hadoop modules").getBytes());
            lp.add(p2);
            Put p3 = new Put(Bytes.toBytes("3"));
            p3.add("content".getBytes(), "info".getBytes(),("Hadoop by reading the documentation").getBytes());
            lp.add(p3);
            Put p4 = new Put(Bytes.toBytes("4"));
            p4.add("content".getBytes(), "info".getBytes(),("Hadoop from the release page").getBytes());
            lp.add(p4);
            Put p5 = new Put(Bytes.toBytes("5"));
            p5.add("content".getBytes(), "info".getBytes(),("Hadoop on the mailing list").getBytes());
            lp.add(p5);
            table.put(lp);
            table.flushCommits();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    }

**2.Mapper 要继承TableMapper

public class HbaseMapper extends TableMapper<Text,IntWritable> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
    //通过 value获取某一列族中的某一列 进行加工 
        byte[] l = value.getValue(Bytes.toBytes("content"), Bytes.toBytes("info"));
        String line = new String(l);
        String[] split = line.split(" ");
        for (String s : split) {
            context.write(new Text(s),new IntWritable(1));
        }
    }
}

**3.Reduce 类要继承TableReduce

public class HbaseReduce extends TableReducer<Text,IntWritable,ImmutableBytesWritable>{

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable value : values) {
            int i  = Integer.parseInt(value.toString());
            sum =sum+i;
        }
        Put put = new Put(Bytes.toBytes(key.toString()));
        put.add(Bytes.toBytes("content"),Bytes.toBytes("info"),Bytes.toBytes(String.valueOf(sum)));

        context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())),put);

    }
}

4.重点:Driver类


    public static void main(String[] args) throws Exception {

        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","hadoop01:2181,hadoop02:2181,hadoop03:2181");

        Job job = Job.getInstance(conf);

        job.setJarByClass(HbaseDriver.class);

        //初始化mapper任务 相当于设置mapper类 
        //参数 分别是: Hbase来源表名，new Scan（）,Mapper类，输出key，输出value，job
        TableMapReduceUtil.initTableMapperJob("word",new Scan(),HbaseMapper.class,Text.class,IntWritable.class,job);

        //初始化reduce任务 相当于设置reduce类 
        //参数 分别是: Hbase目的表名，Reduce类，job 
        jobTableMapReduceUtil.initTableReducerJob("stat",HbaseReduce.class,job);


       job.waitForCompletion(true);

    }