FieldsGroup的并行度没有起作用
问题描述
设置将要进行fieldsGrouping的bolt的并行度为3,但实际上的task数量只有2。下面通过样例来重现一下问题。
代码中有一个ProduceRecordSpout用来发射字符文本(“type”,”line”),SplitRecordBolt将ProduceRecordSpout发送的文本按空格进行切割,向下发送(“type”,”word”),DistributeWordByTypeBolt接收ProduceRecordSpout发送的type和word,并打印。
如代码所示,初始化3个spout,他们有不同的type,根据type进行fieldsGrouping分发给DistributeWordByTypeBolt的不同task,DistributeWordByTypeBolt中我们测试接收到的不同type的taskid。发现taskid只有2种,也就是有一个task接收了两种type,并行度3(3个task)没有起作用,测试结果和代码如下:
完整代码
public class Main {
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, InterruptedException {
// configure & build topology
TopologyBuilder builder = new TopologyBuilder();
// configure 3 spouts
builder.setSpout("spout-number", new ProduceRecordSpout("number", new String[] {"111 222 333", "80966 31"}), 1);
builder.setSpout("spout-string", new ProduceRecordSpout("string", new String[]{"abc ddd fasko", "hello the word"}), 1);
builder.setSpout("spout-sign", new ProduceRecordSpout("sign", new String[]{"++ -*% *** @@", "{+-} ^#######"}), 1);
// configure splitter bolt
builder.setBolt("bolt-splitter", new SplitRecordBolt(), 2)
.shuffleGrouping("spout-number")
.shuffleGrouping("spout-string")
.shuffleGrouping("spout-sign");
// configure distributor bolt
// 此处将相同的 type字段发送到同一个task中去
builder.setBolt("bolt-distributor", new DistributeWordByTypeBolt(),3)
.fieldsGrouping("bolt-splitter", new Fields("type"));
//.shuffleGrouping("bolt-splitter");
//.fieldsGrouping("bolt-splitter", new Fields("type"));
// configure 3 saver bolts
// builder.setBolt("bolt-number-saver", new SaveDataBolt(Type.NUMBER), 3)
// .shuffleGrouping("bolt-distributor", "stream-number-saver");
// builder.setBolt("bolt-string-saver", new SaveDataBolt(Type.STRING), 3)
// .shuffleGrouping("bolt-distributor", "stream-string-saver");
// builder.setBolt("bolt-sign-saver", new SaveDataBolt(Type.SIGN), 3)
// .shuffleGrouping("bolt-distributor", "stream-sign-saver");
// submit topology
Config conf = new Config();
conf.setMaxTaskParallelism(10000);
//String name = MultiStreamsWordDistributionTopology.class.getSimpleName();
String name ="MultiStreamsWordDistributionTopology";
// if (args != null && args.length > 0) {
// String nimbus = args[0];
// conf.put(Config.NIMBUS_HOST, nimbus);
// conf.setNumWorkers(3);
// StormSubmitter.submitTopologyWithProgressBar(name, conf, builder.createTopology());
// } else {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(name, conf, builder.createTopology());
Thread.sleep(60 * 60 * 1000);
cluster.shutdown();
//}
}
}
interface Type {
String NUMBER = "NUMBER";
String STRING = "STRING";
String SIGN = "SIGN";
}
public class ProduceRecordSpout extends BaseRichSpout {
private static final long serialVersionUID = 1L;
//private static final Log LOG = LogFactory.getLog(ProduceRecordSpout.class);
private SpoutOutputCollector collector;
private Random rand;
private String[] recordLines;
private String type;
//构造函数会接收一个 lines参数作为要发送的字符文本集
//构造函数会接受一个 type参数最为emit的字段
public ProduceRecordSpout(String type, String[] lines) {
this.type = type;
recordLines = lines;
}
public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
this.collector = collector;
rand = new Random();
}
public void nextTuple() {
Utils.sleep(500);
String record = recordLines[rand.nextInt(recordLines.length)];
List<Object> values = new Values(type, record);
collector.emit(values, values);
//LOG.info("Record emitted: type=" + type + ", record=" + record);
//System.out.println("Record emmited: type="+type+", record="+record);
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("type", "record"));
}
}
public class SplitRecordBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
//private static final Log LOG = LogFactory.getLog(SplitRecordBolt.class);
private OutputCollector collector;
private int taskId;
//@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector;
this.taskId=context.getThisTaskId();
}
//@Override
public void execute(Tuple input) {
String type = input.getString(0);
String line = input.getString(1);
// System.out.println("id:"+taskId+" Type="+type+",line="+line);
if(line != null && !line.trim().isEmpty()) {
for(String word : line.split("\\s+")) {
collector.emit(input, new Values(type, word));
//LOG.info("Word emitted: type=" + type + ", word=" + word);
//System.out.println("Word emitted: type=" + type + ", word=" + word);
//ack tuple
collector.ack(input);
}
}
}
//@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("type", "word"));
}
}
public class DistributeWordByTypeBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
//private static final Log LOG = LogFactory.getLog(DistributeWordByTypeBolt.class);
private OutputCollector collector;
private int taskId;
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector;
Map<GlobalStreamId, Grouping> sources = context.getThisSources();
System.out.println("sources==> " + sources);
this.taskId=context.getThisTaskId();
//LOG.info("sources==> " + sources);
}
@Override
public void execute(Tuple input) {
String type = input.getString(0);
String word = input.getString(1);
switch(type) {
case Type.NUMBER:
emit("stream-number-saver", type, input, word);
break;
case Type.STRING:
emit("stream-string-saver", type, input, word);
break;
case Type.SIGN:
emit("stream-sign-saver", type, input, word);
break;
default:
// if unknown type, record is discarded.
// as needed, you can define a bolt to subscribe the stream 'stream-discarder'.
emit("stream-discarder", type, input, word);
}
// ack tuple
collector.ack(input);
}
private void emit(String streamId, String type, Tuple input, String word) {
collector.emit(streamId, input, new Values(type, word));
//LOG.info("Distribution, typed word emitted: type=" + type + ", word=" + word);
System.out.println("taskID:"+this.taskId+" type=" + type + ", word=" + word);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("stream-number-saver", new Fields("type", "word"));
declarer.declareStream("stream-string-saver", new Fields("type", "word"));
declarer.declareStream("stream-sign-saver", new Fields("type", "word"));
declarer.declareStream("stream-discarder", new Fields("type", "word"));
}
}
运行结果
taskID:2 type=SIGN, word=***
taskID:2 type=SIGN, word=@@
taskID:2 type=STRING, word=hello
taskID:2 type=STRING, word=the
taskID:2 type=STRING, word=word
taskID:2 type=SIGN, word=++
taskID:2 type=SIGN, word=-*%
taskID:2 type=SIGN, word=***
taskID:3 type=NUMBER, word=80966
taskID:3 type=NUMBER, word=31
解决
最后的解决方法是将setSpout进行修改,不再使用Type.STRING,而是直接指定字符串(具体是什么原因还不知道,有知道的大神请告知,万分感谢)
builder.setSpout("spout-number", new ProduceRecordSpout("number", new String[] {"111 222 333", "80966 31"}), 1);
builder.setSpout("spout-string", new ProduceRecordSpout("string", new String[]{"abc ddd fasko", "hello the word"}), 1);
builder.setSpout("spout-sign", new ProduceRecordSpout("sign", new String[]{"++ -*% *** @@", "{+-} ^#######"}), 1);
运行结果
taskID:4 type=sign, word=***
taskID:3 type=number, word=31
taskID:4 type=sign, word=@@
taskID:2 type=string, word=hello
taskID:2 type=string, word=the
taskID:2 type=string, word=word