partitionAggregate
partitionAggregate
会在一批 tuple 的每个分区上执行一个指定的功能操作。以下面这段代码为例:
mystream.partitionAggregate(new Fields("b"), new Sum(), new Fields("sum"))
假如输入流中包含有 “a”、“b” 两个域并且有以下几个 tuple 块:
Partition 0:
["a", 1]
["b", 2]
Partition 1:
["a", 3]
["c", 8]
Partition 2:
["e", 1]
["d", 9]
["d", 10]
经过上面的代码之后,输出就会变成带有一个名为 “sum” 的域的数据流,其中的 tuple 就是这样的:
Partition 0:
[3]
Partition 1:
[11]
Partition 2:
[20]
Toplogy
首先需要分区,方法为partitionBy按照field进行分区
package storm.topology;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.builtin.Count;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import storm.spout.FixedBatchSpout;
import storm.trident.Split;
import storm.trident.Statistics;
import storm.trident.WordAggregat;
/**
* Created with IntelliJ IDEA.
* User: Administrator
* Date: 17-9-20
* Time: 上午10:48
* To change this template use File | Settings | File Templates.
*/
public class TridentAggreTopology {
public static void main(String args[]){
TridentTopology topology = new TridentTopology();
FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 1,
new Values("the cow jumped "),
new Values("cow jumped"),
new Values("jumped"));
// spout.setCycle(true);
topology.newStream("batch-spout",spout)
.each(new Fields("sentence"), new Split(), new Fields("word")) //分割
.partitionBy(new Fields("word"))
.partitionAggregate(new Fields("word"),new WordAggregat(), new Fields("agg"));
StormTopology stormTopology = topology.build();
LocalCluster cluster = new LocalCluster();
Config conf = new Config();
conf.setDebug(true);
cluster.submitTopology("soc", conf,stormTopology);
}
}
WordAggreat
package storm.trident;
import org.apache.storm.trident.operation.BaseAggregator;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values;
import java.util.HashMap;
import java.util.Map;
/**
* Created with IntelliJ IDEA.
* User: Administrator
* Date: 17-9-1
* Time: 上午10:36
* To change this template use File | Settings | File Templates.
*/
public class WordAggregat extends BaseAggregator<Map<String, Integer>> {
public static Map<String, Integer> map = new HashMap<String, Integer>();
@Override
public Map<String, Integer> init(Object batchId, TridentCollector collector) {
return new HashMap<String, Integer>();
}
@Override
public void aggregate(Map<String, Integer> val, TridentTuple tuple,
TridentCollector collector) {
String location = tuple.getString(0);
Integer i = map.get(location);
if(null == i){
i = 0;
}else{
i = i+1;
}
map.put(location, i);
}
@Override
public void complete(Map<String, Integer> val, TridentCollector collector) {
for (String key : map.keySet()) {
System.out.println("key= "+ key + " and value= " + map.get(key));
}
collector.emit(new Values(map));
}
}
结果
我们预期的结果为聚合单词出现的次数