STORM入门之（TridentAPI,partition）

最新推荐文章于 2024-05-30 00:13:31 发布

Ginoy

最新推荐文章于 2024-05-30 00:13:31 发布

阅读量478

点赞数 1

分类专栏： [老达笔记]Storm [老达笔记]大数据笔记 Storm基础应用

本文链接：https://blog.csdn.net/yl3395017/article/details/78039587

版权

[老达笔记]大数据笔记同时被 3 个专栏收录

35 篇文章 0 订阅

订阅专栏

[老达笔记]Storm

20 篇文章 1 订阅

订阅专栏

Storm基础应用

19 篇文章 5 订阅

订阅专栏

partitionAggregate

partitionAggregate 会在一批 tuple 的每个分区上执行一个指定的功能操作。以下面这段代码为例：

mystream.partitionAggregate(new Fields("b"), new Sum(), new Fields("sum"))

假如输入流中包含有 “a”、“b” 两个域并且有以下几个 tuple 块：

Partition 0:
["a", 1]
["b", 2]

Partition 1:
["a", 3]
["c", 8]

Partition 2:
["e", 1]
["d", 9]
["d", 10]

经过上面的代码之后，输出就会变成带有一个名为 “sum” 的域的数据流，其中的 tuple 就是这样的：

Partition 0:
[3]

Partition 1:
[11]

Partition 2:
[20]

Toplogy

首先需要分区，方法为partitionBy按照field进行分区

package storm.topology;

import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.builtin.Count;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import storm.spout.FixedBatchSpout;
import storm.trident.Split;
import storm.trident.Statistics;
import storm.trident.WordAggregat;

/**
 * Created with IntelliJ IDEA.
 * User: Administrator
 * Date: 17-9-20
 * Time: 上午10:48
 * To change this template use File | Settings | File Templates.
 */
public class TridentAggreTopology {

    public static void main(String args[]){
        TridentTopology topology = new TridentTopology();
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 1,
                new Values("the cow jumped "),
                new Values("cow jumped"),
                new Values("jumped"));
//        spout.setCycle(true);

        topology.newStream("batch-spout",spout)
                .each(new Fields("sentence"), new Split(), new Fields("word"))    //分割
                .partitionBy(new Fields("word"))
                .partitionAggregate(new Fields("word"),new WordAggregat(), new Fields("agg"));


        StormTopology stormTopology = topology.build();
        LocalCluster cluster = new LocalCluster();
        Config conf = new Config();
        conf.setDebug(true);
        cluster.submitTopology("soc", conf,stormTopology);

    }

}

WordAggreat

package storm.trident;

import org.apache.storm.trident.operation.BaseAggregator;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.tuple.Values;

import java.util.HashMap;
import java.util.Map;

/**
 * Created with IntelliJ IDEA.
 * User: Administrator
 * Date: 17-9-1
 * Time: 上午10:36
 * To change this template use File | Settings | File Templates.
 */
public  class WordAggregat extends BaseAggregator<Map<String, Integer>> {

    public static  Map<String, Integer> map =  new HashMap<String, Integer>();

    @Override
    public Map<String, Integer> init(Object batchId, TridentCollector collector) {
        return new HashMap<String, Integer>();
    }

    @Override
    public void aggregate(Map<String, Integer> val, TridentTuple tuple,
                          TridentCollector collector) {
        String location = tuple.getString(0);
        Integer i = map.get(location);
        if(null == i){
               i = 0;
        }else{
            i = i+1;
        }
        map.put(location, i);
    }

    @Override
    public void complete(Map<String, Integer> val, TridentCollector collector) {
        for (String key : map.keySet()) {
            System.out.println("key= "+ key + " and value= " + map.get(key));
        }
        collector.emit(new Values(map));
    }
}