HBASE(五 MapReduce)

Hbase 也可以做一些MapReduce操作

Hbase的MaprReuce 无非三种 :

  1. HDFS 中的数据 成为 Hbase 的某个表的某一列
  2. HBase中的某一列 成为HDFS 中的数据
  3. HBase某一表某列 加工 流入 HBase另一表中某列

实现Demo如下 :

1.创建两个表 插入模板数据

public class HbaseMR {

    private static Configuration conf;
    private static Connection conn;
    private static Admin admin;

    static{
        conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","hadoop01:2181,hadoop02:2181,hadoop03:2181");
        try {
          conn = ConnectionFactory.createConnection(conf);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    public static void initTable(){
        try {
            //创建两个表
            admin = conn.getAdmin();
            HTableDescriptor word = new HTableDescriptor(TableName.valueOf("word"));
            HTableDescriptor stat = new HTableDescriptor(TableName.valueOf("stat"));
            HColumnDescriptor content = new HColumnDescriptor("content");
            word.addFamily(content);
            stat.addFamily(content);
            admin.createTable(word);
            admin.createTable(stat);
            //初始化第一个表的数据
            Table table = conn.getTable(TableName.valueOf("word"));
            table.setAutoFlushTo(false);
            table.setWriteBufferSize(5);
            List<Put> lp = new ArrayList<Put>();
            Put p1 = new Put(Bytes.toBytes("1"));
            p1.add("content".getBytes(), "info".getBytes(), ("The Apache Hadoop software library is a framework").getBytes());
            lp.add(p1);
            Put p2 = new Put(Bytes.toBytes("2"));
            p2.add("content".getBytes(),"info".getBytes(),("The common utilities that support the other Hadoop modules").getBytes());
            lp.add(p2);
            Put p3 = new Put(Bytes.toBytes("3"));
            p3.add("content".getBytes(), "info".getBytes(),("Hadoop by reading the documentation").getBytes());
            lp.add(p3);
            Put p4 = new Put(Bytes.toBytes("4"));
            p4.add("content".getBytes(), "info".getBytes(),("Hadoop from the release page").getBytes());
            lp.add(p4);
            Put p5 = new Put(Bytes.toBytes("5"));
            p5.add("content".getBytes(), "info".getBytes(),("Hadoop on the mailing list").getBytes());
            lp.add(p5);
            table.put(lp);
            table.flushCommits();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    }

**2.Mapper 要继承TableMapper

public class HbaseMapper extends TableMapper<Text,IntWritable> {

    @Override
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
    //通过 value获取某一列族中的某一列 进行加工 
        byte[] l = value.getValue(Bytes.toBytes("content"), Bytes.toBytes("info"));
        String line = new String(l);
        String[] split = line.split(" ");
        for (String s : split) {
            context.write(new Text(s),new IntWritable(1));
        }
    }
}

**3.Reduce 类 要继承TableReduce

public class HbaseReduce extends TableReducer<Text,IntWritable,ImmutableBytesWritable>{

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable value : values) {
            int i  = Integer.parseInt(value.toString());
            sum =sum+i;
        }
        Put put = new Put(Bytes.toBytes(key.toString()));
        put.add(Bytes.toBytes("content"),Bytes.toBytes("info"),Bytes.toBytes(String.valueOf(sum)));

        context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())),put);

    }
}

4.重点:Driver类


    public static void main(String[] args) throws Exception {

        Configuration conf = HBaseConfiguration.create();
        conf.set("hbase.zookeeper.quorum","hadoop01:2181,hadoop02:2181,hadoop03:2181");

        Job job = Job.getInstance(conf);

        job.setJarByClass(HbaseDriver.class);

        //初始化mapper任务 相当于设置mapper类 
        //参数 分别是: Hbase来源表名,new Scan(),Mapper类,输出key,输出value,job
        TableMapReduceUtil.initTableMapperJob("word",new Scan(),HbaseMapper.class,Text.class,IntWritable.class,job);

        //初始化reduce任务 相当于设置reduce类 
        //参数 分别是: Hbase目的表名,Reduce类,job 
        jobTableMapReduceUtil.initTableReducerJob("stat",HbaseReduce.class,job);


       job.waitForCompletion(true);

    }
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值