Storm项目05——storm Trident

白T

于 2020-07-21 17:26:47 发布

阅读量204

点赞数

分类专栏：大数据技术——storm

本文链接：https://blog.csdn.net/zytmaster/article/details/107493314

版权

大数据技术——storm 专栏收录该内容

12 篇文章 1 订阅

订阅专栏

Trident Function的使用示例

自定义Function需要继承BaseFunction类

Function相当于storm topology中的bolt，只不过需要用each方法来连接spout与bolt、bolt和bolt

spout的创建方法有点不同于storm topology

例子01：将一些字段中的某些字段相加，输出这些字段及相加的结果

package com.xnmzdx.storm.trident.example;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.testing.FixedBatchSpout;
import storm.trident.tuple.TridentTuple;

public class TridentFunction {
	
	public static class SumFunction extends BaseFunction{//BaseFunction是函数

		private static final long serialVersionUID = 1991768683305970824L;

		public void execute(TridentTuple tuple, TridentCollector collector) {
			System.out.println("传入进来的内容为："+tuple);
			//获取a,b两个域
			int a = tuple.getInteger(0);
			int b = tuple.getInteger(1);
			int sum = a + b;
			collector.emit(new Values(sum));
		}
	}
	
	//继承BaseFunction类，重写execute方法
	public static class Result extends BaseFunction{

		public void execute(TridentTuple tuple, TridentCollector collector) {
			//获取tuple输入的内容
			System.out.println();
			Integer a = tuple.getIntegerByField("a");
			Integer b = tuple.getIntegerByField("b");
			Integer c = tuple.getIntegerByField("c");
			Integer d = tuple.getIntegerByField("d");
			System.out.println("a:"+a+",b:"+b+",c:"+c+",d:"+d);
			Integer sum = tuple.getIntegerByField("sum");
			System.out.println("sum:"+sum);
		}
		
	}
	
	public static StormTopology buildTopology() {
		TridentTopology topology = new TridentTopology();
		//设定数据源
		FixedBatchSpout spout = new FixedBatchSpout(
				new Fields("a","b","c","d"),//声明输入的域字段
				4,			//设置批处理大小
				//设置数据源内容
				//测试数据源
				new Values(1,4,7,10),
				new Values(1,1,3,11),
				new Values(2,2,7,1),
				new Values(1,5,7,2));
		
		
				//指定是否循环
				spout.setCycle(false);
				//指定输入源spout
				Stream inputStream = topology.newStream("spout", spout);
				/**
				 * 要实现流spout - bolt的模式在trident里是使用each来做的
				 * each方法参数：
				 * 1.输入数据源参数名称："a","b","c","d"
				 * 2.需要流转执行的function对象（也就是bolt对象）：new SumFunction()
				 * 3.指定function对象里的输出参数名称：sum
				 */
				inputStream.each(new Fields("a","b","c","d"), new SumFunction(),new Fields("sum"))
					/**
					 * 继续使用each调用下一个function(bolt)
					 * 第一个参数为："a","b","c","d","sum"
					 * 第二个参数为：new Result() 也就是执行函数
					 * 第三个参数为没有输出
					 */
					.each(new Fields("a","b","c","d","sum"),new Result(),new Fields());
				return topology.build();
	}
	
	
	
	public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
		Config conf = new Config();
		//设置bach最大处理
		conf.setNumWorkers(2);
		conf.setMaxSpoutPending(20);
		if(args.length==0) {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("trident-function", conf, buildTopology());
			Thread.sleep(10000);
			cluster.shutdown();
		}else {
			StormSubmitter.submitTopology(args[0],conf,buildTopology());
		}
	}
}

Trident Filter的使用示例

自定义Filter需要继承BaseFilter类

Filter也相当于storm topology中的bolt，也需要用each方法来连接，连接时each的参数与Function有些不同

例子02：过滤出一些字段中前两个字段相加为偶数的数据

package com.xnmzdx.storm.trident.example;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.BaseFilter;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.testing.FixedBatchSpout;
import storm.trident.tuple.TridentTuple;

public class TrintFilter {
	
	/**
	 *过滤出  前两个元素相加为偶数的  数据
	 * @author zyt
	 *
	 */
	public static class CheckFilter extends BaseFilter{

		private static final long serialVersionUID = -6921355475400756903L;

		public boolean isKeep(TridentTuple tuple) {
			int a = tuple.getInteger(0);
			int b = tuple.getInteger(1);
			int sum = a + b;
			if(sum % 2 == 0) {
				return true;
			}
			return false;
		}
		
	}
	
	//继承BaseFunction类，重写execute方法
		public static class Result extends BaseFunction{

			public void execute(TridentTuple tuple, TridentCollector collector) {
				//获取tuple输入的内容
				System.out.println();
				Integer a = tuple.getIntegerByField("a");
				Integer b = tuple.getIntegerByField("b");
				Integer c = tuple.getIntegerByField("c");
				Integer d = tuple.getIntegerByField("d");
				System.out.println("a:"+a+",b:"+b+",c:"+c+",d:"+d);
			}
			
		}

		/**
		 * 这是一个生成拓扑StormTopology的一个方法
		 * @return
		 */
	public static StormTopology buildTopology() {
		TridentTopology topology = new TridentTopology();
		//设定数据源
		FixedBatchSpout spout = new FixedBatchSpout(
				new Fields("a","b","c","d"),//声明输入的域字段
				4,			//设置批处理大写
				//设置数据源内容
				//测试数据源
				new Values(1,4,7,10),
				new Values(1,1,3,11),
				new Values(2,2,7,1),
				new Values(1,5,7,2));
				//指定是否循环
				spout.setCycle(false);
				//指定输入源spout
				Stream inputStream = topology.newStream("spout", spout);
				/**
				 * 要实现流spout - bolt的模式在trident里是使用each来做的
				 * each方法参数：
				 * 1.输入数据源参数名称：subjects
				 * 2.需要流转执行的function对象（也就是bolt对象）：new Split()
				 */
				inputStream.each(new Fields("a","b","c","d"), new CheckFilter())//CheckFilter为一个Filter，后面不需要多的参数了
					//继续使用each调用一下个function(bolt)输入参数为subject和count，第二个参数为new Result()也就是执行函数，第三个参数为没有输出
					.each(new Fields("a","b","c","d"),new Result(),new Fields());
				return topology.build();
	}
	
	public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
		Config conf = new Config();
		//设置bach最大处理
		conf.setNumWorkers(2);
		conf.setMaxSpoutPending(20);
		if(args.length==0) {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("trident-function", conf, buildTopology());
			Thread.sleep(10000);
			cluster.shutdown();
		}else {
			StormSubmitter.submitTopology(args[0],conf,buildTopology());
		}
	}
}

体会几种分组策略（随机分组：shuffle；分区分组：partitionBy；全局分组：global；广播分组：broadcast）的不同

例子03：将单词写入文件中

package com.xnmzdx.storm.trident.strategy;

import java.io.FileWriter;
import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

/**
 * 此Function的功能为将单词写入文件中
 * @author zyt
 *
 */
public class WriteFunction extends BaseFunction {

	private static final long serialVersionUID = -8101457620853073558L;

	private FileWriter writer;
	
	private static final Log log = LogFactory.getLog(WriteFunction.class);
	
	public void execute(TridentTuple tuple, TridentCollector collector) {
		String text = tuple.getStringByField("sub");
		try {
			if(writer == null) {
//				writer = new FileWriter("writer.txt");
				if(writer == null){
	                if(System.getProperty("os.name").equals("Windows 10")){
	                    writer = new FileWriter("D:\\stormtest\\" + this);//产生的文件名是com.xnmzdx.storm.trident.strategy.WriteFunction@7391b99d
	                } else if(System.getProperty("os.name").equals("Windows 8.1")){
	                    writer = new FileWriter("D:\\stormtest\\" + this);
	                } else if(System.getProperty("os.name").equals("Windows 7")){
	                    writer = new FileWriter("D:\\stormtest\\" + this);
	                } else if(System.getProperty("os.name").equals("Linux")){
	                    System.out.println("----:" + System.getProperty("os.name"));
	                    writer = new FileWriter("/usr/local/temp/" + this);
	                }
	            }
	            log.info("【write】： 写入文件");
				writer.write(text);
				writer.write("\n");
				writer.flush();
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

}






package com.xnmzdx.storm.trident.strategy;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.testing.FixedBatchSpout;

public class StrategyTopology {

	public static StormTopology buildTopology() {
		TridentTopology topology = new TridentTopology();
		FixedBatchSpout spout = new FixedBatchSpout(
				new Fields("sub"),//声明输入的字段为“sub”
				1,	//设置批处理大小
				new Values("java"),
				new Values("python"),
				new Values("php"),
				new Values("c++"),
				new Values("ruby")
				);
		//指定循环
		spout.setCycle(true);//true
		//指定输入源spout
		Stream inputStream = topology.newStream("spout", spout);
		/**
		 * 要实现流spout - bolt的模式 在trident里是使用each来做的
		 * each方法参数：
		 * 1.输入数据源参数名称："sub"
		 * 2.需要流转执行的function对象（也就是bolt对象）：new WriteFunction()
		 * 3.指定function对象里的输出参数名称，没有则不输出任何内容
		 */
		inputStream
		//随机分组：shuffle
//		.shuffle()
		//分区分组：partitionBy
//		.partitionBy(new Fields("sub"))
		//全局分组：global
		.global()
		//广播分组
//		.broadcast()
		.each(new Fields("sub"), new WriteFunction(),new Fields()).parallelismHint(4); //.parallelismHint(4)设置并行度
		return topology.build();
	}
	
	public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
		Config conf = new Config();
		conf.setNumWorkers(2);
		//设置bach最大处理
		conf.setMaxSpoutPending(20);
		if(args.length==0) {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("trident-function", conf, buildTopology());
			Thread.sleep(20000);
			cluster.shutdown();
		}else {
			StormSubmitter.submitTopology(args[0],conf,buildTopology());
		}
	}

}

例子04：单词个数统计

package com.xnmzdx.storm.trident.wordcount;

import backtype.storm.tuple.Values;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class SplitFunction extends BaseFunction {

	private static final long serialVersionUID = 8522249543742585261L;

	public void execute(TridentTuple tuple, TridentCollector collector) {
		String subjects = tuple.getStringByField("subjects");
		//获取tuple输入内容
		//逻辑处理，然后发射给下一个组件
		for(String sub : subjects.split(" ")) {
			collector.emit(new Values(sub));
		}
	}
}





package com.xnmzdx.storm.trident.wordcount;

import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class ResultFunction extends BaseFunction {

	private static final long serialVersionUID = -3904602976630760587L;

	public void execute(TridentTuple tuple, TridentCollector collector) {
		String sub = tuple.getStringByField("sub");
		Long count = tuple.getLongByField("count");
		System.out.println(sub + ":" + count);
	}

}





package com.xnmzdx.storm.trident.wordcount;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.Stream;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
import storm.trident.testing.FixedBatchSpout;

public class WordCountTopology {

	public static StormTopology buildTopology() {
		//设定数据源
		TridentTopology topology = new TridentTopology();
		FixedBatchSpout spout = new FixedBatchSpout(
				new Fields("subjects"),//声明输入的字段为“subjects”
				4,	//设置批处理大小
				//设置数据源内容
				//测试数据
				new Values("java java php ruby c++"),
				new Values("java python python python c++"),
				new Values("java java java java ruby"),
				new Values("c++ java ruby php java")
				);
		//指定循环
		spout.setCycle(false);
		//使用IBatchSpout接口实例化一个spout
		//指定输入源spout
		Stream inputStream = topology.newStream("spout", spout);
		/**
		 * 要实现流spout - bolt的模式 在trident李是使用each来做的
		 * each方法参数：
		 * 1.输入数据源参数名称："subjects"
		 * 2.需要流转执行的function对象（也就是bolt对象）：new Split()
		 * 3.指定function对象里的输出参数名称："subject"
		 */
		inputStream.shuffle()
			.each(new Fields("subjects"), new SplitFunction(),new Fields("sub"))
			//进行分组：参数为分组字段sub，比较类似于我们之前所接触的FieldsGroup
			.groupBy(new Fields("sub"))
			//对分组之后的结果进行聚合操作：参数1为聚合方法为count函数，输出字段为count
			.aggregate(new Count(), new Fields("count"))
			//继续使用each调用下一个function(bolt)输入参数为subjects和count,第二个参数为new Result()也就是执行函数，第三个参数为没有输出
			.each(new Fields("sub","count"), new ResultFunction(),new Fields())
			.parallelismHint(1);
		return topology.build(); //利用这种方法，返回一个topology对象，进行提交
	}
	
	public static void main(String[] args) throws InterruptedException, AlreadyAliveException, InvalidTopologyException {
		Config conf = new Config();
		//设置bach最大处理
		conf.setNumWorkers(2);
		conf.setMaxSpoutPending(20);
		if(args.length==0) {
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology("trident-function", conf, buildTopology());
			Thread.sleep(10000);
			cluster.shutdown();
		}else {
			StormSubmitter.submitTopology(args[0],conf,buildTopology());
		}
	}

}