Trident Function的使用示例
自定义Function需要继承BaseFunction类
Function相当于storm topology中的bolt,只不过需要用each方法来连接spout与bolt、bolt和bolt
spout的创建方法有点不同于storm topology
例子01:将一些字段中的某些字段相加,输出这些字段及相加的结果
package com.xnmzdx.storm.trident.example;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.testing.FixedBatchSpout;
import storm.trident.tuple.TridentTuple;
public class TridentFunction {
public static class SumFunction extends BaseFunction{//BaseFunction是函数
private static final long serialVersionUID = 1991768683305970824L;
public void execute(TridentTuple tuple, TridentCollector collector) {
System.out.println("传入进来的内容为:"+tuple);
//获取a,b两个域
int a = tuple.getInteger(0);
int b = tuple.getInteger(1);
int sum = a + b;
collector.emit(new Values(sum));
}
}
//继承BaseFunction类,重写execute方法
public static class Result extends BaseFunction{
public void execute(TridentTuple tuple, TridentCollector collector) {
//获取tuple输入的内容
System.out.println();
Integer a = tuple.getIntegerByField("a");
Integer b = tuple.getIntegerByField("b");
Integer c = tuple.getIntegerByField("c");
Integer d = tuple.getIntegerByField("d");
System.out.println("a:"+a+",b:"+b+",c:"+c+",d:"+d);
Integer sum = tuple.getIntegerByField("sum");
System.out.println("sum:"+sum);
}
}
public static StormTopology buildTopology() {
TridentTopology topology = new TridentTopology();
//设定数据源
FixedBatchSpout spout = new FixedBatchSpout(
new Fields("a","b","c","d"),//声明输入的域字段
4, //设置批处理大小
//设置数据源内容
//测试数据源
new Values(1,4,7,10),
new Values(1,1,3,11),
new Values(2,2,7,1),
new Values(1,5,7,2));
//指定是否循环
spout.setCycle(false);
//指定输入源spout
Stream inputStream = topology.newStream("spout", spout);
/**
* 要实现流spout - bolt的模式在trident里是使用each来做的
* each方法参数:
* 1.输入数据源参数名称:"a","b","c","d"
* 2.需要流转执行的function对象(也就是bolt对象):new SumFunction()
* 3.指定function对象里的输出参数名称:sum
*/
inputStream.each(new Fields("a","b","c","d"), new SumFunction(),new Fields("sum"))
/**
* 继续使用each调用下一个function(bolt)
* 第一个参数为:"a","b","c","d","sum"
* 第二个参数为:new Result() 也就是执行函数
* 第三个参数为没有输出
*/
.each(new Fields("a","b","c","d","sum"),new Result(),new Fields());
return topology.build();
}
public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
Config conf = new Config();
//设置bach最大处理
conf.setNumWorkers(2);
conf.setMaxSpoutPending(20);
if(args.length==0) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("trident-function", conf, buildTopology());
Thread.sleep(10000);
cluster.shutdown();
}else {
StormSubmitter.submitTopology(args[0],conf,buildTopology());
}
}
}
Trident Filter的使用示例
自定义Filter需要继承BaseFilter类
Filter也相当于storm topology中的bolt,也需要用each方法来连接,连接时each的参数与Function有些不同
例子02:过滤出一些字段中前两个字段相加为偶数的数据
package com.xnmzdx.storm.trident.example;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.testing.FixedBatchSpout;
import storm.trident.tuple.TridentTuple;
public class TrintFilter {
/**
*过滤出 前两个元素相加为偶数的 数据
* @author zyt
*
*/
public static class CheckFilter extends BaseFilter{
private static final long serialVersionUID = -6921355475400756903L;
public boolean isKeep(TridentTuple tuple) {
int a = tuple.getInteger(0);
int b = tuple.getInteger(1);
int sum = a + b;
if(sum % 2 == 0) {
return true;
}
return false;
}
}
//继承BaseFunction类,重写execute方法
public static class Result extends BaseFunction{
public void execute(TridentTuple tuple, TridentCollector collector) {
//获取tuple输入的内容
System.out.println();
Integer a = tuple.getIntegerByField("a");
Integer b = tuple.getIntegerByField("b");
Integer c = tuple.getIntegerByField("c");
Integer d = tuple.getIntegerByField("d");
System.out.println("a:"+a+",b:"+b+",c:"+c+",d:"+d);
}
}
/**
* 这是一个生成拓扑StormTopology的一个方法
* @return
*/
public static StormTopology buildTopology() {
TridentTopology topology = new TridentTopology();
//设定数据源
FixedBatchSpout spout = new FixedBatchSpout(
new Fields("a","b","c","d"),//声明输入的域字段
4, //设置批处理大写
//设置数据源内容
//测试数据源
new Values(1,4,7,10),
new Values(1,1,3,11),
new Values(2,2,7,1),
new Values(1,5,7,2));
//指定是否循环
spout.setCycle(false);
//指定输入源spout
Stream inputStream = topology.newStream("spout", spout);
/**
* 要实现流spout - bolt的模式在trident里是使用each来做的
* each方法参数:
* 1.输入数据源参数名称:subjects
* 2.需要流转执行的function对象(也就是bolt对象):new Split()
*/
inputStream.each(new Fields("a","b","c","d"), new CheckFilter())//CheckFilter为一个Filter,后面不需要多的参数了
//继续使用each调用一下个function(bolt)输入参数为subject和count,第二个参数为new Result()也就是执行函数,第三个参数为没有输出
.each(new Fields("a","b","c","d"),new Result(),new Fields());
return topology.build();
}
public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
Config conf = new Config();
//设置bach最大处理
conf.setNumWorkers(2);
conf.setMaxSpoutPending(20);
if(args.length==0) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("trident-function", conf, buildTopology());
Thread.sleep(10000);
cluster.shutdown();
}else {
StormSubmitter.submitTopology(args[0],conf,buildTopology());
}
}
}
体会几种分组策略(随机分组:shuffle;分区分组:partitionBy;全局分组:global;广播分组:broadcast)的不同
例子03:将单词写入文件中
package com.xnmzdx.storm.trident.strategy;
import java.io.FileWriter;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;
/**
* 此Function的功能为将单词写入文件中
* @author zyt
*
*/
public class WriteFunction extends BaseFunction {
private static final long serialVersionUID = -8101457620853073558L;
private FileWriter writer;
private static final Log log = LogFactory.getLog(WriteFunction.class);
public void execute(TridentTuple tuple, TridentCollector collector) {
String text = tuple.getStringByField("sub");
try {
if(writer == null) {
// writer = new FileWriter("writer.txt");
if(writer == null){
if(System.getProperty("os.name").equals("Windows 10")){
writer = new FileWriter("D:\\stormtest\\" + this);//产生的文件名是com.xnmzdx.storm.trident.strategy.WriteFunction@7391b99d
} else if(System.getProperty("os.name").equals("Windows 8.1")){
writer = new FileWriter("D:\\stormtest\\" + this);
} else if(System.getProperty("os.name").equals("Windows 7")){
writer = new FileWriter("D:\\stormtest\\" + this);
} else if(System.getProperty("os.name").equals("Linux")){
System.out.println("----:" + System.getProperty("os.name"));
writer = new FileWriter("/usr/local/temp/" + this);
}
}
log.info("【write】: 写入文件");
writer.write(text);
writer.write("\n");
writer.flush();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
package com.xnmzdx.storm.trident.strategy;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.testing.FixedBatchSpout;
public class StrategyTopology {
public static StormTopology buildTopology() {
TridentTopology topology = new TridentTopology();
FixedBatchSpout spout = new FixedBatchSpout(
new Fields("sub"),//声明输入的字段为“sub”
1, //设置批处理大小
new Values("java"),
new Values("python"),
new Values("php"),
new Values("c++"),
new Values("ruby")
);
//指定循环
spout.setCycle(true);//true
//指定输入源spout
Stream inputStream = topology.newStream("spout", spout);
/**
* 要实现流spout - bolt的模式 在trident里是使用each来做的
* each方法参数:
* 1.输入数据源参数名称:"sub"
* 2.需要流转执行的function对象(也就是bolt对象):new WriteFunction()
* 3.指定function对象里的输出参数名称,没有则不输出任何内容
*/
inputStream
//随机分组:shuffle
// .shuffle()
//分区分组:partitionBy
// .partitionBy(new Fields("sub"))
//全局分组:global
.global()
//广播分组
// .broadcast()
.each(new Fields("sub"), new WriteFunction(),new Fields()).parallelismHint(4); //.parallelismHint(4)设置并行度
return topology.build();
}
public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
Config conf = new Config();
conf.setNumWorkers(2);
//设置bach最大处理
conf.setMaxSpoutPending(20);
if(args.length==0) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("trident-function", conf, buildTopology());
Thread.sleep(20000);
cluster.shutdown();
}else {
StormSubmitter.submitTopology(args[0],conf,buildTopology());
}
}
}
例子04:单词个数统计
package com.xnmzdx.storm.trident.wordcount;
import backtype.storm.tuple.Values;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;
public class SplitFunction extends BaseFunction {
private static final long serialVersionUID = 8522249543742585261L;
public void execute(TridentTuple tuple, TridentCollector collector) {
String subjects = tuple.getStringByField("subjects");
//获取tuple输入内容
//逻辑处理,然后发射给下一个组件
for(String sub : subjects.split(" ")) {
collector.emit(new Values(sub));
}
}
}
package com.xnmzdx.storm.trident.wordcount;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;
public class ResultFunction extends BaseFunction {
private static final long serialVersionUID = -3904602976630760587L;
public void execute(TridentTuple tuple, TridentCollector collector) {
String sub = tuple.getStringByField("sub");
Long count = tuple.getLongByField("count");
System.out.println(sub + ":" + count);
}
}
package com.xnmzdx.storm.trident.wordcount;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
import storm.trident.testing.FixedBatchSpout;
public class WordCountTopology {
public static StormTopology buildTopology() {
//设定数据源
TridentTopology topology = new TridentTopology();
FixedBatchSpout spout = new FixedBatchSpout(
new Fields("subjects"),//声明输入的字段为“subjects”
4, //设置批处理大小
//设置数据源内容
//测试数据
new Values("java java php ruby c++"),
new Values("java python python python c++"),
new Values("java java java java ruby"),
new Values("c++ java ruby php java")
);
//指定循环
spout.setCycle(false);
//使用IBatchSpout接口实例化一个spout
//指定输入源spout
Stream inputStream = topology.newStream("spout", spout);
/**
* 要实现流spout - bolt的模式 在trident李是使用each来做的
* each方法参数:
* 1.输入数据源参数名称:"subjects"
* 2.需要流转执行的function对象(也就是bolt对象):new Split()
* 3.指定function对象里的输出参数名称:"subject"
*/
inputStream.shuffle()
.each(new Fields("subjects"), new SplitFunction(),new Fields("sub"))
//进行分组:参数为分组字段sub,比较类似于我们之前所接触的FieldsGroup
.groupBy(new Fields("sub"))
//对分组之后的结果进行聚合操作:参数1为聚合方法为count函数,输出字段为count
.aggregate(new Count(), new Fields("count"))
//继续使用each调用下一个function(bolt)输入参数为subjects和count,第二个参数为new Result()也就是执行函数,第三个参数为没有输出
.each(new Fields("sub","count"), new ResultFunction(),new Fields())
.parallelismHint(1);
return topology.build(); //利用这种方法,返回一个topology对象,进行提交
}
public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
Config conf = new Config();
//设置bach最大处理
conf.setNumWorkers(2);
conf.setMaxSpoutPending(20);
if(args.length==0) {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("trident-function", conf, buildTopology());
Thread.sleep(10000);
cluster.shutdown();
}else {
StormSubmitter.submitTopology(args[0],conf,buildTopology());
}
}
}