Strom整合Hbase

当您需要对大数据进行随机,实时的读/写访问时,请使用Apache HBase™。该项目的目标是在商品硬件群集上托管超大型表-数十亿行X数百万列。Apache HBase是一个开源的,分布式的,版本化的,非关系型数据库,其仿照Chang等人的Google的Bigtable:结构化数据的分布式存储系统。正如Bigtable利用Google文件系统提供的分布式数据存储一样,Apache HBase在Hadoop和HDFS之上提供类似于Bigtable的功能。

9、Hbase环境快速搭建
安装解压:
tar -zxvf hbase-1.2.0-cdh5.7.0.tar.gz -C ~/app/

修改配置文件:hbase-env.sh
export JAVA_HOME=/home/storm/app/jdk1.8.0_91
export HBASE_MANAGES_ZK=false

修改配置文件:hbase-site.xml

hbase.rootdir
hdfs://storm000:8020/hbase

hbase.cluster.distributed true hbase.zookeeper.quorum storm000:2181

启动外部zookeeper
./zkServer.sh start

启动Hbase
./start-hbase.sh

使用Hbase
hbase shell
create ‘test’, ‘cf’ 创建表
list 列出所有表
scan ‘test’ 查看表数据

10、Storm整合Hbase编程开发
package com.imooc.bigdata.integration.hbase;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.hbase.bolt.HBaseBolt;
import org.apache.storm.hbase.bolt.mapper.SimpleHBaseMapper;
import org.apache.storm.redis.bolt.RedisStoreBolt;
import org.apache.storm.redis.common.config.JedisPoolConfig;
import org.apache.storm.redis.common.mapper.RedisDataTypeDescription;
import org.apache.storm.redis.common.mapper.RedisStoreMapper;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.ITuple;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;

import java.util.HashMap;
import java.util.Map;
import java.util.Random;

/**

  • 使用Strom完成词频统计功能
    */

public class LocalWordCountHbaseStormTopology {
/**
* 数据进入
*/
public static class DataSoureceSpout extends BaseRichSpout{
private SpoutOutputCollector collector;

    @Override
    public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
        this.collector=spoutOutputCollector;
    }

    //定义一个字符数组
    public static final String[] words = new String[]{"xiaoqi","xiaoliu","xiaowu","xiaosi","xiaosan","xiaoer","xiaoyi"};

    @Override
    public void nextTuple() {
        //初始化随机函数
        Random random = new Random();
        //随机从words数组里面取出数
        String word = words[random.nextInt(words.length)];
        //发射随机得到的单词
        this.collector.emit(new Values(word));
        //打印发射的内容
        System.out.println("emit: " + word);
        //没发射一个睡一会
        Utils.sleep(1000);

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("line"));

    }
}


/**
 * 业务:处理数据进行分割
 */
public static class SplitBolt extends BaseRichBolt{
    //定义发射数据
    private OutputCollector collector;

    @Override
    public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
        this.collector=outputCollector;
    }

    /**
     * 业务:处理数据进行分割
     * @param tuple
     */
    @Override
    public void execute(Tuple tuple) {
        String word = tuple.getStringByField("line");
        this.collector.emit(new Values(word));
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("word"));
    }
}


/**
 * 词频汇总Bolt
 */
public static class CountBolt extends BaseRichBolt{
    private OutputCollector collector;

    @Override
    public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
        this.collector=outputCollector;
    }

    //用来存放获取到的单词
    Map<String,Integer> map = new HashMap<String,Integer>();

    /**
     * 业务逻辑
     * 1、获取每个单词
     * 2、对所有单词进行汇总
     * 3、输出
     * @param tuple
     */
    @Override
    public void execute(Tuple tuple) {
        //1、获取每个单词
        String word = tuple.getStringByField("word");
        //2、判断每个单词是否存在,不存在赋值0,存在加1
        Integer count = map.get(word);
        if (count == null){
            count = 0;
        }

        count ++;


        //单词汇总
        map.put(word,count);

        //输出
        this.collector.emit(new Values(word,map.get(word)));

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        outputFieldsDeclarer.declare(new Fields("word","count"));
    }
}




public static void main(String[] args) {
    //集成hbase
    Config config = new Config();
    Map<String,Object> hbaseConf = new HashMap<String,Object>();
    hbaseConf.put("hbase.rootdir","hdfs://storm000:8020/hbase");
    hbaseConf.put("hbase.zookeeper.quorum","storm000:2181");
    config.put("hbase.conf",hbaseConf);


    //通过TopologyBuilder根据Spout和Bolt构建Topolog
    TopologyBuilder builder = new TopologyBuilder();
    builder.setSpout("DataSoureceSpout",new DataSoureceSpout());
    builder.setBolt("SplitBolt",new SplitBolt()).shuffleGrouping("DataSoureceSpout");
    builder.setBolt("CountBolt",new CountBolt()).shuffleGrouping("SplitBolt");

    SimpleHBaseMapper mapper = new SimpleHBaseMapper()
            .withRowKeyField("word")
            .withColumnFields(new Fields("word"))
            .withCounterFields(new Fields("count"))
            .withColumnFamily("cf");

    //集成hbase
    HBaseBolt hbaseBolt = new HBaseBolt("wc", mapper).withConfigKey("hbase.conf");
    builder.setBolt("HBaseBolt",hbaseBolt).shuffleGrouping("CountBolt");


    //创建本地集群
    LocalCluster cluster = new LocalCluster();
    cluster.submitTopology("LocalWordCountHbaseStormTopology",config, builder.createTopology());

}

}

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值