当您需要对大数据进行随机,实时的读/写访问时,请使用Apache HBase™。该项目的目标是在商品硬件群集上托管超大型表-数十亿行X数百万列。Apache HBase是一个开源的,分布式的,版本化的,非关系型数据库,其仿照Chang等人的Google的Bigtable:结构化数据的分布式存储系统。正如Bigtable利用Google文件系统提供的分布式数据存储一样,Apache HBase在Hadoop和HDFS之上提供类似于Bigtable的功能。
9、Hbase环境快速搭建
安装解压:
tar -zxvf hbase-1.2.0-cdh5.7.0.tar.gz -C ~/app/
修改配置文件:hbase-env.sh
export JAVA_HOME=/home/storm/app/jdk1.8.0_91
export HBASE_MANAGES_ZK=false
修改配置文件:hbase-site.xml
hbase.rootdir
hdfs://storm000:8020/hbase
启动外部zookeeper
./zkServer.sh start
启动Hbase
./start-hbase.sh
使用Hbase
hbase shell
create ‘test’, ‘cf’ 创建表
list 列出所有表
scan ‘test’ 查看表数据
10、Storm整合Hbase编程开发
package com.imooc.bigdata.integration.hbase;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.hbase.bolt.HBaseBolt;
import org.apache.storm.hbase.bolt.mapper.SimpleHBaseMapper;
import org.apache.storm.redis.bolt.RedisStoreBolt;
import org.apache.storm.redis.common.config.JedisPoolConfig;
import org.apache.storm.redis.common.mapper.RedisDataTypeDescription;
import org.apache.storm.redis.common.mapper.RedisStoreMapper;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.ITuple;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
/**
- 使用Strom完成词频统计功能
*/
public class LocalWordCountHbaseStormTopology {
/**
* 数据进入
*/
public static class DataSoureceSpout extends BaseRichSpout{
private SpoutOutputCollector collector;
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.collector=spoutOutputCollector;
}
//定义一个字符数组
public static final String[] words = new String[]{"xiaoqi","xiaoliu","xiaowu","xiaosi","xiaosan","xiaoer","xiaoyi"};
@Override
public void nextTuple() {
//初始化随机函数
Random random = new Random();
//随机从words数组里面取出数
String word = words[random.nextInt(words.length)];
//发射随机得到的单词
this.collector.emit(new Values(word));
//打印发射的内容
System.out.println("emit: " + word);
//没发射一个睡一会
Utils.sleep(1000);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("line"));
}
}
/**
* 业务:处理数据进行分割
*/
public static class SplitBolt extends BaseRichBolt{
//定义发射数据
private OutputCollector collector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector=outputCollector;
}
/**
* 业务:处理数据进行分割
* @param tuple
*/
@Override
public void execute(Tuple tuple) {
String word = tuple.getStringByField("line");
this.collector.emit(new Values(word));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("word"));
}
}
/**
* 词频汇总Bolt
*/
public static class CountBolt extends BaseRichBolt{
private OutputCollector collector;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector=outputCollector;
}
//用来存放获取到的单词
Map<String,Integer> map = new HashMap<String,Integer>();
/**
* 业务逻辑
* 1、获取每个单词
* 2、对所有单词进行汇总
* 3、输出
* @param tuple
*/
@Override
public void execute(Tuple tuple) {
//1、获取每个单词
String word = tuple.getStringByField("word");
//2、判断每个单词是否存在,不存在赋值0,存在加1
Integer count = map.get(word);
if (count == null){
count = 0;
}
count ++;
//单词汇总
map.put(word,count);
//输出
this.collector.emit(new Values(word,map.get(word)));
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("word","count"));
}
}
public static void main(String[] args) {
//集成hbase
Config config = new Config();
Map<String,Object> hbaseConf = new HashMap<String,Object>();
hbaseConf.put("hbase.rootdir","hdfs://storm000:8020/hbase");
hbaseConf.put("hbase.zookeeper.quorum","storm000:2181");
config.put("hbase.conf",hbaseConf);
//通过TopologyBuilder根据Spout和Bolt构建Topolog
TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("DataSoureceSpout",new DataSoureceSpout());
builder.setBolt("SplitBolt",new SplitBolt()).shuffleGrouping("DataSoureceSpout");
builder.setBolt("CountBolt",new CountBolt()).shuffleGrouping("SplitBolt");
SimpleHBaseMapper mapper = new SimpleHBaseMapper()
.withRowKeyField("word")
.withColumnFields(new Fields("word"))
.withCounterFields(new Fields("count"))
.withColumnFamily("cf");
//集成hbase
HBaseBolt hbaseBolt = new HBaseBolt("wc", mapper).withConfigKey("hbase.conf");
builder.setBolt("HBaseBolt",hbaseBolt).shuffleGrouping("CountBolt");
//创建本地集群
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("LocalWordCountHbaseStormTopology",config, builder.createTopology());
}
}