jstorm 使用demo

结合 fieldsGrouping分组模式的一个jstorm使用实例,指定分组模式可以保证被指定的字段,如果相同值 则tuple会分配给同个task处理。例如 一个tuple有两个字段field1、field2,如果指定field1为分组字段,现在有两个tuple分别是tuple1、tuple2,tuple1中filed1值为a、filed2值为b,tuple2的filed1值为a、filed2值为c。则tuple1和tuple2会被分到相同的线程处理

1.创建spout数据源,实例中是不断从Redis中拉取数据

import backtype.storm.spout.SpoutOutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichSpout;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import com.demo.config.SpringBeans;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.data.redis.core.RedisTemplate;

import java.util.Map;
import java.util.UUID;

public class MySpout extends BaseRichSpout {

    private SpoutOutputCollector collector;

    private String componentId;

    private int taskId;

    private RedisTemplate<String,Object> redisTemplate;

    private static final Logger logger = LoggerFactory.getLogger(MySpout.class);

    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        this.collector = collector;
        componentId = context.getThisComponentId();
        this.taskId = context.getThisTaskId();

        redisTemplate = SpringBeans.getBean("redisTemplate",RedisTemplate.class);
    }

    public void nextTuple() {
        try {
            Object value = redisTemplate.opsForList().leftPop("student");
            logger.info("MySpout nextTuple componentId:{} taskId:{} value:{}",componentId,taskId,value);
            //value = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
            if (null == value){
                Thread.sleep(1000);
                return;
            }
            String uuid = UUID.randomUUID().toString();
            collector.emit(new Values(value),uuid);
        }catch (Exception e){
            e.printStackTrace();
        }

    }

    //设置输出流的字段
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("student"));
    }
}

 

2.创建第一个bolt

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;

public class FirstBolt implements IRichBolt {
    private Logger logger = LoggerFactory.getLogger(FirstBolt.class);

    private OutputCollector collector;

    private Integer taskId;

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        this.taskId = context.getThisTaskId();
    }

    public void execute(Tuple input) {
        try {
            Map<String,String> student = (Map<String, String>) input.getValue(0);
            //student = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
            String address = student.get("address");
            //address = "hangzhou"
            collector.emit(input,new Values(address,student));
            //上面input(old tuple)和 new Values(new tuple)关联一起,才能保证后续子tuple的ack对spout自作用
            logger.info("FirstBolt execute taskId:{} value:{}",taskId,student);
        }catch (Exception e){
            e.printStackTrace();
        }

    }

    public void cleanup() {

    }

    //设置输出流的字段
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("address","student"));
        //address 是分组字段。 new Fields 字段跟上面 execute 中collector.emit 的new Values字段对应
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

3。创建第二个bolt

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.IRichBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Map;

public class SecondBolt extends BaseRichBolt {

    private Logger logger = LoggerFactory.getLogger(SecondBolt.class);

    private OutputCollector outputCollector;

    private Integer taskId;
    @Override
    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        this.outputCollector = collector;
        this.taskId = context.getThisTaskId();
    }

    @Override
    public void execute(Tuple input) {
        try {
            String address = input.getString(0);
            //address="hangzhou"
            Map<String,String> student = (Map<String, String>) input.getValue(1);
            //student = "{\"name\":\"zhangsan\",\"address\":\"hangzhou\"}"
            logger.info("SecondBolt execute taskId:{} address:{} student:{}",taskId,address,student);
//            outputCollector.ack(input);  //BaseRichBolt 自动 ack 。IRichBolt需要使用者手动调用ack
        }catch (Exception e){
            e.printStackTrace();
        }

    }

    @Override
    public void cleanup() {

    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

4.生成我们的topology

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.topology.SpoutDeclarer;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.tuple.Fields;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration;

import java.util.HashMap;
import java.util.Map;

@SpringBootApplication(exclude = {JdbcTemplateAutoConfiguration.class},
scanBasePackages = "com.demo")
public class MyTopology {
    public static void main(String[] args) {
        SpringApplication.run(MyTopology.class,args);

        Map conf = new HashMap();
        //topology所有自定义的配置均放入这个Map

        TopologyBuilder builder = new TopologyBuilder();
        //创建topology的生成器

        int spoutParal = 1;
        //spout的并发设置

        builder.setSpout("my-spout", new MySpout(), spoutParal);
        //创建Spout, 其中new MySpout() 为真正spout对象,"my-spout" 为spout的名字,注意名字中不要含有空格

        int boltParal = 3;
        //bolt的并发设置

        builder.setBolt("first-bolt", new FirstBolt(), boltParal)
                .localOrShuffleGrouping("my-spout");
        //创建bolt, "first-bolt" 为bolt名字,new FirstBolt( 为bolt对象,boltParal为bolt并发数,
        //shuffleGrouping("my-spout"),表示接收"my-spout"的数据,并且以shuffle方式,
        //即每个spout随机轮询发送tuple到下一级bolt中


        builder.setBolt("second-bolt",new SecondBolt(),boltParal)
                .fieldsGrouping("first-bolt",new Fields("address"));

        //创建bolt, "second-bolt" 为bolt名字,new SecondBolt() 为bolt对象,boltParal为bolt并发数,
        //fieldsGrouping("first-bolt",new Fields("address")),
        //表示接收"first-bolt"的数据,并以fieldsGrouping方式,
        //即每个上一级bolt(这里既是 first-bolt),以"address"分组发送tuple到下一级bolt中

        int ackerParal = 1;
        Config.setNumAckers(conf, ackerParal);
        //设置表示acker的并发数

        int workerNum = 1;
        conf.put(Config.TOPOLOGY_WORKERS, workerNum);
        //表示整个topology将使用几个worker

        conf.put(Config.STORM_CLUSTER_MODE, "distributed");
        //设置topolog模式为分布式,这样topology就可以放到JStorm集群上运行

        try {
//            StormSubmitter.submitTopology("first-topology", conf,
//                    builder.createTopology());

            
            //本地模式
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("first-topology", conf,
                    builder.createTopology());

        } catch (Exception e) {
            e.printStackTrace();
        }
        //提交topology
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值