局部聚合与全局聚合

1.说明

  在局部聚合的类中必须有一个内部类,用处存储状态值,中间的结果

  GroupBy+局部聚合,不需要过多的跨网络传输

  GroupBy+全局聚合,需要大量的网络传输

 

一:局部聚合

1.需求

  另一个流,基于分钟的订单金额总数,局部聚合

 

2.驱动类

 1 package com.jun.trident;
 2 
 3 import backtype.storm.Config;
 4 import backtype.storm.LocalCluster;
 5 import backtype.storm.StormSubmitter;
 6 import backtype.storm.generated.AlreadyAliveException;
 7 import backtype.storm.generated.InvalidTopologyException;
 8 import backtype.storm.tuple.Fields;
 9 import backtype.storm.tuple.Values;
10 import storm.trident.Stream;
11 import storm.trident.TridentState;
12 import storm.trident.TridentTopology;
13 import storm.trident.operation.Function;
14 import storm.trident.operation.TridentCollector;
15 import storm.trident.operation.TridentOperationContext;
16 import storm.trident.operation.builtin.Count;
17 import storm.trident.operation.builtin.Sum;
18 import storm.trident.testing.FixedBatchSpout;
19 import storm.trident.testing.MemoryMapState;
20 import storm.trident.tuple.TridentTuple;
21 
22 import java.util.Map;
23 
24 public class TridentDemo {
25     public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
26         TridentTopology tridentTopology=new TridentTopology();
27         //模拟数据
28         Fields field=new Fields("log","flag");
29         FixedBatchSpout spout=new FixedBatchSpout(field,5,
30             new Values("168.214.187.214 - - [1481953616092] \"GET /view.php HTTP/1.1\" 200 0 \"http://cn.bing.com/search?q=spark mllib\" \"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1\" \"-\"","A"),
31             new Values("168.187.202.202 - - [1481953537038] \"GET /IBEIfeng.gif?order_id=1063&orderTime=1481953537038&memberId=4000012340500607&productInfos=10005-2099.48-B-1|10004-1886.62-A-2|10001-961.99-A-1&orderAmt=6834.70 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2;Tident/6.0)\" \"-\"","A"),
32             new Values("61.30.167.187 - - [1481953539039] \"GET /IBEIfeng.gif?order_id=1064&orderTime=1481953539039&memberId=4000930409959999&productInfos=10007-3329.13-B-1|10009-2607.71-B-1|10002-390.62-A-1|10006-411.00-B-2&orderAmt=7149.46 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19\" \"-\"","A"),
33             new Values("30.29.132.190 - - [1481953544042] \"GET /IBEIfeng.gif?order_id=1065&orderTime=1481953544043&memberId=1234568970080798&productInfos=10005-2099.48-B-1|10001-3242.40-C-2|10006-411.00-B-1&orderAmt=8995.28 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 7_)_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53\" \"-\"","B"),
34             new Values("222.190.187.201 - - [1481953578068] \"GET /IBEIfeng.gif?order_id=1066&orderTime=1481953578068&memberId=3488586887970809&productInfos=10005-2099.48-B-1|10001-2774.16-C-2&orderAmt=7647.80 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1\" \"-\"","B"),
35             new Values("72.202.43.53 - - [1481953579069] \"GET /IBEIfeng.gif?order_id=1067&orderTime=1481953579069&memberId=2084859896989877&productInfos=10007-3329.13-B-1|10001-961.99-A-2&orderAmt=5253.10 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19\" \"-\"","B")
36         );
37         //多次循环
38         spout.setCycle(true);
39         //流处理
40         Stream stream=tridentTopology.newStream("orderAnalyse",spout)
41                 //过滤
42             .each(new Fields("log"),new ValidLogFilter())
43                 //解析
44             .each(new Fields("log"), new LogParserFunction(),new Fields("orderId","orderTime","orderAmtStr","memberId"))
45                 //投影
46             .project(new Fields("orderId","orderTime","orderAmtStr","memberId"))
47                 //时间解析
48             .each(new Fields("orderTime"),new DateTransFormerFunction(),new Fields("day","hour","minter"))
49          ;
50         //分流
51         //1.基于minter统计订单数量,分组统计
52         TridentState state=stream.groupBy(new Fields("minter"))
53                 //全局聚合,使用内存存储状态信息
54                 .persistentAggregate(new MemoryMapState.Factory(),new Count(),new Fields("orderNumByMinter"));
55 //        state.newValuesStream().each(new Fields("minter","orderNumByMinter"),new PrintFilter());
56 
57         //2.另一个流,基于分钟的订单金额,局部聚合
58         Stream partitionStream=stream.each(new Fields("orderAmtStr"),new TransforAmtToDoubleFunction(),new Fields("orderAmt"))
59             .groupBy(new Fields("minter"))
60                     //局部聚合
61                 .chainedAgg()    //聚合链
62             .partitionAggregate(new Fields("orderAmt"),new LocalSum(),new Fields("orderAmtSumOfLocal"))
63                 .chainEnd();      //聚合链
64         partitionStream.each(new Fields("minter","orderAmtSumOfLocal"),new PrintFilter());
65         
66         //提交
67         Config config=new Config();
68         if(args==null || args.length<=0){
69             LocalCluster localCluster=new LocalCluster();
70             localCluster.submitTopology("tridentDemo",config,tridentTopology.build());
71         }else {
72             config.setNumWorkers(2);
73             StormSubmitter.submitTopology(args[0],config,tridentTopology.build());
74         }
75     }
76 }

 

3.金额从字符串转为double类型的方法类

 1 package com.jun.trident;
 2 
 3 import backtype.storm.tuple.Values;
 4 import org.slf4j.Logger;
 5 import org.slf4j.LoggerFactory;
 6 import storm.trident.operation.Function;
 7 import storm.trident.operation.TridentCollector;
 8 import storm.trident.operation.TridentOperationContext;
 9 import storm.trident.tuple.TridentTuple;
10 
11 import java.util.Map;
12 
13 public class TransforAmtToDoubleFunction implements Function {
14     private static final Logger logger= LoggerFactory.getLogger(TransforAmtToDoubleFunction.class);
15     @Override
16     public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
17         String orderAmtStr=tridentTuple.getStringByField("orderAmtStr");
18         try{
19             Double orderAmt=Double.parseDouble(orderAmtStr);
20             tridentCollector.emit(new Values(orderAmt));
21         }catch (Exception e){
22             logger.error("金额转换错误:"+orderAmtStr);
23         }
24 
25     }
26 
27     @Override
28     public void prepare(Map map, TridentOperationContext tridentOperationContext) {
29 
30     }
31 
32     @Override
33     public void cleanup() {
34 
35     }
36 }

 

4.局部聚合的类

 1 package com.jun.trident;
 2 
 3 import backtype.storm.tuple.Values;
 4 import storm.trident.operation.Aggregator;
 5 import storm.trident.operation.TridentCollector;
 6 import storm.trident.operation.TridentOperationContext;
 7 import storm.trident.tuple.TridentTuple;
 8 
 9 import java.util.Map;
10 
11 
12 public class LocalSum implements Aggregator<LocalSum.InnerState> {
13     //内部状态类,用于暂存累计的结果状态
14     public static class InnerState{
15         public double amtSum=0.0;
16     }
17     @Override
18     public InnerState init(Object o, TridentCollector tridentCollector) {
19         InnerState innerState=new InnerState();
20         innerState.amtSum=0.0;
21         return innerState;
22     }
23 
24     //執行循环累计
25     @Override
26     public void aggregate(InnerState preState, TridentTuple tridentTuple, TridentCollector tridentCollector) {
27         Double orderAmt=tridentTuple.getDoubleByField("orderAmt");
28         //累计
29         double preSum=preState.amtSum;
30         double newAmt=preSum+orderAmt;
31         //更新
32        preState.amtSum=newAmt;
33     }
34 
35     @Override
36     public void complete(InnerState innerState, TridentCollector tridentCollector) {
37         //最终结果
38        tridentCollector.emit(new Values(innerState.amtSum));
39     }
40 
41     @Override
42     public void prepare(Map map, TridentOperationContext tridentOperationContext) {
43 
44     }
45 
46     @Override
47     public void cleanup() {
48 
49     }
50 
51 
52 
53 }

 

5.效果

  

 

二:全局聚合

1.说明

  在上一个文档中的程序中已经开始使用全局聚合了,但是这里在和局部聚合放在一起重新说明一次

 

2.驱动类

 1 package com.jun.trident;
 2 
 3 import backtype.storm.Config;
 4 import backtype.storm.LocalCluster;
 5 import backtype.storm.StormSubmitter;
 6 import backtype.storm.generated.AlreadyAliveException;
 7 import backtype.storm.generated.InvalidTopologyException;
 8 import backtype.storm.tuple.Fields;
 9 import backtype.storm.tuple.Values;
10 import storm.trident.Stream;
11 import storm.trident.TridentState;
12 import storm.trident.TridentTopology;
13 import storm.trident.operation.Function;
14 import storm.trident.operation.TridentCollector;
15 import storm.trident.operation.TridentOperationContext;
16 import storm.trident.operation.builtin.Count;
17 import storm.trident.operation.builtin.Sum;
18 import storm.trident.testing.FixedBatchSpout;
19 import storm.trident.testing.MemoryMapState;
20 import storm.trident.tuple.TridentTuple;
21 
22 import java.util.Map;
23 
24 public class TridentDemo {
25     public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
26         TridentTopology tridentTopology=new TridentTopology();
27         //模拟数据
28         Fields field=new Fields("log","flag");
29         FixedBatchSpout spout=new FixedBatchSpout(field,5,
30             new Values("168.214.187.214 - - [1481953616092] \"GET /view.php HTTP/1.1\" 200 0 \"http://cn.bing.com/search?q=spark mllib\" \"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1\" \"-\"","A"),
31             new Values("168.187.202.202 - - [1481953537038] \"GET /IBEIfeng.gif?order_id=1063&orderTime=1481953537038&memberId=4000012340500607&productInfos=10005-2099.48-B-1|10004-1886.62-A-2|10001-961.99-A-1&orderAmt=6834.70 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2;Tident/6.0)\" \"-\"","A"),
32             new Values("61.30.167.187 - - [1481953539039] \"GET /IBEIfeng.gif?order_id=1064&orderTime=1481953539039&memberId=4000930409959999&productInfos=10007-3329.13-B-1|10009-2607.71-B-1|10002-390.62-A-1|10006-411.00-B-2&orderAmt=7149.46 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19\" \"-\"","A"),
33             new Values("30.29.132.190 - - [1481953544042] \"GET /IBEIfeng.gif?order_id=1065&orderTime=1481953544043&memberId=1234568970080798&productInfos=10005-2099.48-B-1|10001-3242.40-C-2|10006-411.00-B-1&orderAmt=8995.28 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (iPhone; CPU iPhone OS 7_)_3 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B511 Safari/9537.53\" \"-\"","B"),
34             new Values("222.190.187.201 - - [1481953578068] \"GET /IBEIfeng.gif?order_id=1066&orderTime=1481953578068&memberId=3488586887970809&productInfos=10005-2099.48-B-1|10001-2774.16-C-2&orderAmt=7647.80 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1\" \"-\"","B"),
35             new Values("72.202.43.53 - - [1481953579069] \"GET /IBEIfeng.gif?order_id=1067&orderTime=1481953579069&memberId=2084859896989877&productInfos=10007-3329.13-B-1|10001-961.99-A-2&orderAmt=5253.10 HTTP/1.1\" 200 0 \"-\" \"Mozilla/5.0 (Linux; Android 4.2.1; Galaxy Nexus Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19\" \"-\"","B")
36         );
37         //多次循环
38         spout.setCycle(true);
39         //流处理
40         Stream stream=tridentTopology.newStream("orderAnalyse",spout)
41                 //过滤
42             .each(new Fields("log"),new ValidLogFilter())
43                 //解析
44             .each(new Fields("log"), new LogParserFunction(),new Fields("orderId","orderTime","orderAmtStr","memberId"))
45                 //投影
46             .project(new Fields("orderId","orderTime","orderAmtStr","memberId"))
47                 //时间解析
48             .each(new Fields("orderTime"),new DateTransFormerFunction(),new Fields("day","hour","minter"))
49          ;
50         //分流
51         //1.基于minter统计订单数量,分组统计
52         TridentState state=stream.groupBy(new Fields("minter"))
53                 //全局聚合,使用内存存储状态信息
54                 .persistentAggregate(new MemoryMapState.Factory(),new Count(),new Fields("orderNumByMinter"));
55 //        state.newValuesStream().each(new Fields("minter","orderNumByMinter"),new PrintFilter());
56 
57         //2.另一个流,基于分钟的订单金额,局部聚合
58         Stream partitionStream=stream.each(new Fields("orderAmtStr"),new TransforAmtToDoubleFunction(),new Fields("orderAmt"))
59             .groupBy(new Fields("minter"))
60                     //局部聚合
61                 .chainedAgg()    //聚合链
62             .partitionAggregate(new Fields("orderAmt"),new LocalSum(),new Fields("orderAmtSumOfLocal"))
63                 .chainEnd();      //聚合链
64 //        partitionStream.each(new Fields("minter","orderAmtSumOfLocal"),new PrintFilter());
65         //做一次全局聚合
66         TridentState partitionState=partitionStream.groupBy(new Fields("minter"))
67                 //全局聚合
68                 .persistentAggregate(new MemoryMapState.Factory(),new Fields("orderAmtSumOfLocal"),new Sum(),new Fields("totalOrderAmt"));
69         partitionState.newValuesStream().each(new Fields("minter","totalOrderAmt"),new PrintFilter());
70 
71         //提交
72         Config config=new Config();
73         if(args==null || args.length<=0){
74             LocalCluster localCluster=new LocalCluster();
75             localCluster.submitTopology("tridentDemo",config,tridentTopology.build());
76         }else {
77             config.setNumWorkers(2);
78             StormSubmitter.submitTopology(args[0],config,tridentTopology.build());
79         }
80     }
81 }

 

3.效果

  

 

转载于:https://www.cnblogs.com/juncaoit/p/9164176.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值