Storm集成HBase、JDBC、Kafka、Hive测试

最新推荐文章于 2022-07-19 10:02:18 发布

xiaogezq0

最新推荐文章于 2022-07-19 10:02:18 发布

阅读量262

点赞数

文章标签： storm

原文链接：https://blog.csdn.net/zonzereal/article/details/79058516

版权

本文详细介绍了如何使用Storm集成Kafka进行实时数据处理，然后将处理结果分别存储到Hive、HBase、MySQL和HDFS。同时，代码示例展示了Storm与Hive的集成过程，以及遇到的挑战。通过这个案例，读者可以了解大数据实时处理和存储的完整流程。

摘要由CSDN通过智能技术生成

/**
 * storm集成Kafka、Hive、JDBC、HBase、HDFS
 * Created by sker on 17-11-13
 * kafka集成storm，将数据发到JobBolt做中文分词逻辑；
 * 结果发到不同bolt，然后分别存入hive、hbase、mysql和hdfs
 */
public class Test {
 
    public static void main(String[] args) {
 
        //创建一个TopologyBuilder实例
        TopologyBuilder topologyBuilder = new TopologyBuilder();
        LocalCluster localCluster = new LocalCluster();
        Config conf = new Config();
 
        /**
         * 以下是kafka到storm的逻辑
         */
 
        //kafka与storm集成需要一个zkHost和一个SpoutConfig
        ZkHosts zkHosts = new ZkHosts("localhost:2181");
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, "hbase", "/storm", "kafka");
 
        /**
         * 以下代码要做的是storm与HDFS集成
         */
 
        //kafka与HDFS集成需要一个HDFSBolt,并进行相应参数的设定
        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://localhost:9000/")//设置hdfs的url
                .withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter(","))//设置文件分割符
                .withSyncPolicy(new CountSyncPolicy(10))//同步政策
                .withFileNameFormat(new DefaultFileNameFormat().withPath("/test"))//文件命名格式，参数中设置了文件路径
                .withRotationPolicy(new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.KB));//设置滚动生成文件的参数，此处为1k生成一个文件
 
        /**
         * 以下代码要做的是storm与hbase集成
         */
 
        //storm与hbase集成
        Config config = new Config();
        Map<String, Object> hbConf = new HashMap<String, Object>();
        hbConf.put("hbase.rootdir","hdfs://localhost:9000/sbsbsbs/hbase/");
        hbConf.put("hbase.zookeeper.quorum", "localhost:2181");
        config.put("hbase.conf", hbConf);
 
        SimpleHBaseMapper simpleHBaseMapper = new SimpleHBaseMapper()
                .withColumnFamily("cf")
                .withColumnFields(new Fields("word","count"))
                .withRowKeyField("word");
 
        HBaseBolt hBaseBolt = new HBaseBolt("demo",simpleHBaseMapper)
                .withConfigKey("hbase.conf");
 
 
        /**
         * 以下代码要做的是storm与JDBC集成
         */
        Map hikariConfigMap = Maps.newHashMap();
        hikariConfigMap.put("dataSourceClassName","com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
        hikariConfigMap.put("dataSource.url", "jdbc:mysql://localhost/test?useunicode=true&characterencoding=utf-8");
        hikariConfigMap.put("dataSource.user","root");
        hikariConfigMap.put("dataSource.password","1327");
 
        ConnectionProvider connectionProvider = new HikariCPConnectionProvider(hikariConfigMap);
 
        String tableName = "seg";
        JdbcMapper simpleJdbcMapper = new SimpleJdbcMapper(tableName, connectionProvider);
 
        JdbcInsertBolt insertBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
//                .withTableName("seg")     //没卵用
                .withInsertQuery("insert into seg values (?,?)")
                .withQueryTimeoutSecs(30);
 
        JdbcInsertBolt selectBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
                .withInsertQuery("select word,count(word) from seg group by word")
                .withQueryTimeoutSecs(30);
 
        /**
         * 构建拓扑
         */
        //kafka到storm的spout，构建拓扑的第一步
        topologyBuilder.setSpout("kafka", new KafkaSpout(spoutConfig));
        //数据进入JobBolt做中文分词处理
        topologyBuilder.setBolt("document",new JobBolt.GetDocument()).shuffleGrouping("kafka");
        topologyBuilder.setBolt("wordCount",new JobBolt.StringToWordCount()).shuffleGrouping("document");
        //数据插入mysql
        topologyBuilder.setBolt("jdbc_insert",insertBolt).shuffleGrouping("wordCount");
        //查询mysql
        topologyBuilder.setBolt("jdbc_select",selectBolt).shuffleGrouping("jdbc_insert");
        //数据存入HDFS
        topologyBuilder.setBolt("hdfs",hdfsBolt).shuffleGrouping("jdbc_select");
        //数据存入HBase
        topologyBuilder.setBolt("hbase",hBaseBolt).shuffleGrouping("wordCount");
 
 
        localCluster.submitTopology("SegGoGo",config,topologyBuilder.createTopology());

storm与hive的集成

/**
 * storm和hive集成比较麻烦，不适合word_segmentation包里一起做联合测试，
 * 而且storm和HDFS集成很简单，可以直接storm-hdfs然后load到hive表
 *
 *storm跟hive集成，需要修改hive配置，包括开启自动分区、设置metadate的uris、设置jdbc以及开启hive.in.test（参考文件为同包下hive-site.xml）；
 * 确保实际环境的hive版本和代码中的jar包版本一致；确保metadate和hiveserver2开启
 *
 * 本测试的hive建表语句
 * create table demo (id int,name string,sex string) partitioned by (age int) clustered by (id) into 3 buckets stored as orc tblproperties ("orc.compress"="NONE",'transactional'='true');
 *
 * storm-hive集成真的很烦，稍不注意就会失败，而且调错更烦，有兴趣的可以自己测试，希望你能成功，哈哈
 */
public class StormHiveTest {
    static class Storm_Hive_Spout extends BaseRichSpout {
        SpoutOutputCollector spoutOutputCollector;
        String[] name = {"aa","bb","cc","dd","ee","ff","gg","hh"};
        String[] sex = {"man","woman"};
        int[] id = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
 
        Random random = new Random();
 
        public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
            this.spoutOutputCollector=spoutOutputCollector;
        }
 
        public void nextTuple() {
            Utils.sleep(1000);
 
            String s = name[random.nextInt(name.length)];
            String sex1 = sex[random.nextInt(sex.length)];
            int id1 = id[random.nextInt(id.length)];
            spoutOutputCollector.emit(new Values(id1,s,sex1,"18"));
            System.out.println(""+id1+":"+s+":"+sex1);
 
        }
 
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("id","name","sex","age"));
        }
    }
 
    public static void main(String[] args) {
 
 
        DelimitedRecordHiveMapper delimitedRecordHiveMapper = new DelimitedRecordHiveMapper();//映射字段，spout那边发来的
        delimitedRecordHiveMapper.withColumnFields(new Fields("id","name","sex"))
                .withPartitionFields(new Fields("age"));
 
        HiveOptions hiveOptions = new HiveOptions("thrift://localhost:9083","default","demo",delimitedRecordHiveMapper);
        hiveOptions.withTxnsPerBatch(10)
                .withBatchSize(20)
                .withIdleTimeout(10);
 
        HiveBolt hiveBolt = new HiveBolt(hiveOptions);
 
        TopologyBuilder topologyBuilder = new TopologyBuilder();
        topologyBuilder.setSpout("spout",new Storm_Hive_Spout());
        topologyBuilder.setBolt("bolt",hiveBolt).shuffleGrouping("spout");
 
        LocalCluster localCluster = new LocalCluster();
        localCluster.submitTopology("go",new Config(),topologyBuilder.createTopology());

完整代码参见github： https://github.com/ZzzzZzreal/StormGoGo/tree/master/src/main/java

storm集成Hive

storm_hive

Storm集成HBase、JDBC、Kafka、HDFS

word_segmentation