Trident杂记练习
Demo练习:newStream创建新流体,each集成函数,过滤器,分发操作(shuffer等),patition分区聚合
package com.neusoft.soc.topology;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseAggregator;
import org.apache.storm.trident.operation.BaseFilter;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.operation.TridentOperationContext;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import scala.Function2;
import scala.collection.parallel.IterableSplitter;
import scala.collection.parallel.ParIterableLike;
import scala.collection.parallel.ParIterableLike.Aggregate;
public class TopologyTrident {
/**
* @param args
*/
@SuppressWarnings("unchecked")
public static void main(String[] args) {
FixedBatchSpout spout = new FixedBatchSpout(
new Fields("sentence"), 3,
new Values("the cow jumped over the moon"),
// new Values("the man went to the store and bought some candy"),
// new Values("four score and seven years ago"),
new Values("how many apples can you eat"));
spout.setCycle(false);
TridentTopology topology = new TridentTopology();
topology.newStream("spout",spout)
.each(new Fields("sentence"), new Split(), new Fields("word"))
.parallelismHint(2)
.partitionBy(new Fields("word")) //.shuffle()
.each(new Fields("word"),new Filter1())
.parallelismHint(5) //patition
.shuffle()
.each(new Fields("word"),new Filter2())
.parallelismHint(2)
.partitionAggregate(new Fields("word"),new Agg1(),new Fields("aggr1"));
StormTopology stormTopology = topology.build();
LocalCluster cluster = new LocalCluster();
Config conf = new Config();
conf.setDebug(false);
cluster.submitTopology("test", conf,stormTopology);
}
@SuppressWarnings("rawtypes")
public static class Agg1 extends BaseAggregator<String>{
public String testStr = " " ;
/**
* 属于哪个batch
*/
private Object batchId;
/**
* 属于哪个分区
*/
private int partitionId;
/**
* 分区数量
*/
private int numPartitions;
/**
* 用来统计
*/
private Map<String,Integer> state;
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map conf, TridentOperationContext context) {
state = new HashMap<String,Integer>();
partitionId = context.getPartitionIndex();
numPartitions = context.numPartitions();
}
@Override
public String init(Object arg0, TridentCollector arg1) {
return null;
}
@Override
public void aggregate(String arg0, TridentTuple arg1,TridentCollector arg2) {
System.out.println(Thread.currentThread().getId()+" "+arg1.getString(0));
testStr = testStr+arg1.getString(0);
// String str = arg1.getString(0);
// System.out.println(str);
}
@Override
public void complete(String arg0, TridentCollector arg1) {
System.out.println(testStr+Thread.currentThread().getId()+" end");
arg1.emit(new Values(arg0));
}
}
public static class Filter1 extends BaseFilter {
public int partitionIndex = 0;
@Override
public void prepare(Map conf, TridentOperationContext context) {
this.partitionIndex = context.getPartitionIndex();
super.prepare(conf, context);
}
@Override
public boolean isKeep(TridentTuple arg0) {
// System.out.println("["+partitionIndex+"]"+arg0.getString(0));
return true;
}
}
public static class Filter2 extends BaseFilter {
public int partitionIndex = 0;
@Override
public void prepare(Map conf, TridentOperationContext context) {
this.partitionIndex = context.getPartitionIndex();
super.prepare(conf, context);
}
@Override
public boolean isKeep(TridentTuple arg0) {
// System.out.println("["+partitionIndex+"]"+arg0.getString(0));
return true;
}
}
public static class Split extends BaseFunction {
public void execute(TridentTuple tuple, TridentCollector collector) {
String sentence = tuple.getString(0);
for(String word: sentence.split(" ")) {
collector.emit(new Values(word));
}
}
}
}