Storm中遇到的坑

FieldsGroup的并行度没有起作用

问题描述

设置将要进行fieldsGrouping的bolt的并行度为3,但实际上的task数量只有2。下面通过样例来重现一下问题。
代码中有一个ProduceRecordSpout用来发射字符文本(“type”,”line”),SplitRecordBolt将ProduceRecordSpout发送的文本按空格进行切割,向下发送(“type”,”word”),DistributeWordByTypeBolt接收ProduceRecordSpout发送的type和word,并打印。

如代码所示,初始化3个spout,他们有不同的type,根据type进行fieldsGrouping分发给DistributeWordByTypeBolt的不同task,DistributeWordByTypeBolt中我们测试接收到的不同type的taskid。发现taskid只有2种,也就是有一个task接收了两种type,并行度3(3个task)没有起作用,测试结果和代码如下:

完整代码

public class Main {

    public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, InterruptedException {

        // configure & build topology
        TopologyBuilder builder = new TopologyBuilder();

        // configure 3 spouts
        builder.setSpout("spout-number", new ProduceRecordSpout("number", new String[] {"111 222 333", "80966 31"}), 1);
        builder.setSpout("spout-string", new ProduceRecordSpout("string", new String[]{"abc ddd fasko", "hello the word"}), 1);
        builder.setSpout("spout-sign", new ProduceRecordSpout("sign", new String[]{"++ -*% *** @@", "{+-} ^#######"}), 1);

        // configure splitter bolt
        builder.setBolt("bolt-splitter", new SplitRecordBolt(), 2)
                .shuffleGrouping("spout-number")
                .shuffleGrouping("spout-string")
                .shuffleGrouping("spout-sign");

        // configure distributor bolt
        // 此处将相同的 type字段发送到同一个task中去
        builder.setBolt("bolt-distributor", new DistributeWordByTypeBolt(),3)
                .fieldsGrouping("bolt-splitter", new Fields("type"));
                //.shuffleGrouping("bolt-splitter");
                //.fieldsGrouping("bolt-splitter", new Fields("type"));

        // configure 3 saver bolts
//        builder.setBolt("bolt-number-saver", new SaveDataBolt(Type.NUMBER), 3)
//                .shuffleGrouping("bolt-distributor", "stream-number-saver");
//        builder.setBolt("bolt-string-saver", new SaveDataBolt(Type.STRING), 3)
//                .shuffleGrouping("bolt-distributor", "stream-string-saver");
//        builder.setBolt("bolt-sign-saver", new SaveDataBolt(Type.SIGN), 3)
//                .shuffleGrouping("bolt-distributor", "stream-sign-saver");

        // submit topology
        Config conf = new Config();
        conf.setMaxTaskParallelism(10000);
        //String name = MultiStreamsWordDistributionTopology.class.getSimpleName();
        String name ="MultiStreamsWordDistributionTopology";
//        if (args != null && args.length > 0) {
//            String nimbus = args[0];
//            conf.put(Config.NIMBUS_HOST, nimbus);
//            conf.setNumWorkers(3);
//            StormSubmitter.submitTopologyWithProgressBar(name, conf, builder.createTopology());
//        } else {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology(name, conf, builder.createTopology());
            Thread.sleep(60 * 60 * 1000);
            cluster.shutdown();
        //}
    }
}
interface Type {
    String NUMBER = "NUMBER";
    String STRING = "STRING";
    String SIGN = "SIGN";
}
public class ProduceRecordSpout extends BaseRichSpout {

    private static final long serialVersionUID = 1L;
    //private static final Log LOG = LogFactory.getLog(ProduceRecordSpout.class);
    private SpoutOutputCollector collector;
    private Random rand;
    private String[] recordLines;
    private String type;

    //构造函数会接收一个 lines参数作为要发送的字符文本集
    //构造函数会接受一个 type参数最为emit的字段
    public ProduceRecordSpout(String type, String[] lines) {
        this.type = type;
        recordLines = lines;
    }

    public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
        this.collector = collector;
        rand = new Random();
    }

    public void nextTuple() {
        Utils.sleep(500);
        String record = recordLines[rand.nextInt(recordLines.length)];
        List<Object> values = new Values(type, record);
        collector.emit(values, values);
        //LOG.info("Record emitted: type=" + type + ", record=" + record);
        //System.out.println("Record emmited: type="+type+", record="+record);
    }


    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("type", "record"));
    }
}
public  class SplitRecordBolt extends BaseRichBolt {

    private static final long serialVersionUID = 1L;
    //private static final Log LOG = LogFactory.getLog(SplitRecordBolt.class);
    private OutputCollector collector;
    private int taskId;




    //@Override
    public void prepare(Map stormConf, TopologyContext context,
                        OutputCollector collector) {
        this.collector = collector;
        this.taskId=context.getThisTaskId();

    }

    //@Override
    public void execute(Tuple input) {
        String type = input.getString(0);
        String line = input.getString(1);
       // System.out.println("id:"+taskId+" Type="+type+",line="+line);
        if(line != null && !line.trim().isEmpty()) {
            for(String word  : line.split("\\s+")) {
                collector.emit(input, new Values(type, word));
                //LOG.info("Word emitted: type=" + type + ", word=" + word);
                //System.out.println("Word emitted: type=" + type + ", word=" + word);
                 //ack tuple
                collector.ack(input);
            }
        }
    }

    //@Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("type", "word"));
    }
}
public class DistributeWordByTypeBolt extends BaseRichBolt {

    private static final long serialVersionUID = 1L;
    //private static final Log LOG = LogFactory.getLog(DistributeWordByTypeBolt.class);
    private OutputCollector collector;
    private int taskId;

    @Override
    public void prepare(Map stormConf, TopologyContext context,
                        OutputCollector collector) {
        this.collector = collector;
        Map<GlobalStreamId, Grouping> sources = context.getThisSources();
        System.out.println("sources==> " + sources);
        this.taskId=context.getThisTaskId();

        //LOG.info("sources==> " + sources);
    }

    @Override
    public void execute(Tuple input) {
        String type = input.getString(0);
        String word = input.getString(1);
        switch(type) {
            case Type.NUMBER:
                emit("stream-number-saver", type, input, word);
                break;
            case Type.STRING:
                emit("stream-string-saver", type, input, word);
                break;
            case Type.SIGN:
                emit("stream-sign-saver", type, input, word);
                break;
            default:
                // if unknown type, record is discarded.
                // as needed, you can define a bolt to subscribe the stream 'stream-discarder'.
                emit("stream-discarder", type, input, word);
        }
        // ack tuple
        collector.ack(input);
    }

    private void emit(String streamId, String type, Tuple input, String word) {
        collector.emit(streamId, input, new Values(type, word));
        //LOG.info("Distribution, typed word emitted: type=" + type + ", word=" + word);
        System.out.println("taskID:"+this.taskId+" type=" + type + ", word=" + word);
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declareStream("stream-number-saver", new Fields("type", "word"));
        declarer.declareStream("stream-string-saver", new Fields("type", "word"));
        declarer.declareStream("stream-sign-saver", new Fields("type", "word"));
        declarer.declareStream("stream-discarder", new Fields("type", "word"));
    }
}

运行结果

taskID:2 type=SIGN, word=***
taskID:2 type=SIGN, word=@@
taskID:2 type=STRING, word=hello
taskID:2 type=STRING, word=the
taskID:2 type=STRING, word=word
taskID:2 type=SIGN, word=++
taskID:2 type=SIGN, word=-*%
taskID:2 type=SIGN, word=***
taskID:3 type=NUMBER, word=80966
taskID:3 type=NUMBER, word=31

解决

最后的解决方法是将setSpout进行修改,不再使用Type.STRING,而是直接指定字符串(具体是什么原因还不知道,有知道的大神请告知,万分感谢)

 builder.setSpout("spout-number", new ProduceRecordSpout("number", new String[] {"111 222 333", "80966 31"}), 1);
        builder.setSpout("spout-string", new ProduceRecordSpout("string", new String[]{"abc ddd fasko", "hello the word"}), 1);
        builder.setSpout("spout-sign", new ProduceRecordSpout("sign", new String[]{"++ -*% *** @@", "{+-} ^#######"}), 1);

运行结果

taskID:4 type=sign, word=***
taskID:3 type=number, word=31
taskID:4 type=sign, word=@@
taskID:2 type=string, word=hello
taskID:2 type=string, word=the
taskID:2 type=string, word=word
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值