Storm1.1.0<trident+window+Hbase集成实现词频统计TopN>

1.温故而知新,使用词频统计实现TopN,以下是使用到的依赖:

<dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>1.1.0</version>
            <!--<scope>provided</scope>-->
        </dependency>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-hbase</artifactId>
            <version>1.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.3</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.zookeeper</groupId>
            <artifactId>zookeeper</artifactId>
            <version>3.4.6</version>
            <exclusions>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-log4j12</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.slf4j</groupId>
                    <artifactId>slf4j-api</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
    </dependencies>

2.代码实现

import org.apache.hadoop.hbase.client.Durability;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.hbase.trident.mapper.SimpleTridentHBaseMapper;
import org.apache.storm.hbase.trident.mapper.TridentHBaseMapper;
import org.apache.storm.hbase.trident.state.HBaseState;
import org.apache.storm.hbase.trident.state.HBaseStateFactory;
import org.apache.storm.hbase.trident.state.HBaseUpdater;
import org.apache.storm.topology.base.BaseWindowedBolt;
import org.apache.storm.trident.TridentTopology;
import org.apache.storm.trident.operation.BaseAggregator;
import org.apache.storm.trident.operation.BaseFunction;
import org.apache.storm.trident.operation.FlatMapFunction;
import org.apache.storm.trident.operation.TridentCollector;
import org.apache.storm.trident.state.StateFactory;
import org.apache.storm.trident.testing.FixedBatchSpout;
import org.apache.storm.trident.tuple.TridentTuple;
import org.apache.storm.trident.windowing.config.SlidingDurationWindow;
import org.apache.storm.trident.windowing.config.WindowConfig;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

import java.util.*;
import java.util.concurrent.TimeUnit;

public class TopNTopology {
    private static class TopNFunction extends BaseFunction {
        private int TOPN;

        public TopNFunction(int n) {
            this.TOPN = n;
        }

        @Override
        public void execute(TridentTuple tuple, TridentCollector collector) {
            HashMap<String, Long> hashMap = (HashMap<String, Long>) tuple.get(0);
            List<Map.Entry<String, Long>> list = new ArrayList<>(hashMap.entrySet());
            Collections.sort(list, new Comparator<Map.Entry<String, Long>>() {
                @Override
                public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
                    return o1.getValue().compareTo(o2.getValue());
                }
            });
            int i = 1;
            for (int j = list.size() - 1; j >= 0; j--) {
                if (i > TOPN)
                    break;
                collector.emit(new Values(String.valueOf(i), list.get(j).getKey(), String.valueOf(list.get(j).getValue())));
                System.out.println("Sending:  " + i + "  " + list.get(j).getKey() + ": " + list.get(j).getValue());
                i++;
            }
            System.out.println("----------------done----------------------");
        }
    }

    private static class SplitFunction implements FlatMapFunction {
        @Override
        public Iterable<Values> execute(TridentTuple input) {
            ArrayList<Values> values = new ArrayList<>();
            String sentence = input.getStringByField("sentence");
            String[] split = sentence.split(" ");
            for (String s : split) {
                values.add(new Values(s));
            }
            return values;
        }
    }

    private static class WordAggregator extends BaseAggregator<HashMap<String, Long>> {

        @Override
        public HashMap<String, Long> init(Object batchId, TridentCollector collector) {
            return new HashMap<>();
        }

        @Override
        public void aggregate(HashMap<String, Long> val, TridentTuple tuple, TridentCollector collector) {
            String word = tuple.getStringByField("word");
            long count = 1;
            if (val.containsKey(word))
                count += val.get(word);
            val.put(word, count);
        }

        @Override
        public void complete(HashMap<String, Long> val, TridentCollector collector) {
            collector.emit(new Values(val));
        }
    }

    public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
                new Values("the cow jumped over the moon"),
                new Values("the man went to the store and bought some candy"),
                new Values("four score and seven years ago"),
                new Values("how many apples can you eat"),
                new Values("to be or not to be the person"));
        spout.setCycle(true);

        TridentHBaseMapper tridentHBaseMapper = new SimpleTridentHBaseMapper()
                .withColumnFamily("result")
                .withColumnFields(new Fields("word", "count"))
                .withRowKeyField("rank");

        HBaseState.Options options = new HBaseState.Options()
                .withConfigKey("hbase")
                .withDurability(Durability.SYNC_WAL)
                .withMapper(tridentHBaseMapper)
                .withTableName("Top5Count");
        StateFactory hBaseStateFactory = new HBaseStateFactory(options);
        WindowConfig durationWindow = SlidingDurationWindow.of(BaseWindowedBolt.Duration.seconds(10), BaseWindowedBolt.Duration.seconds(5));
        TridentTopology topology = new TridentTopology();
        topology.newStream("fixedSpout", spout)
                .flatMap(new SplitFunction(), new Fields("word"))
                .window(durationWindow, new Fields("word"), new WordAggregator(), new Fields("wordcount"))
                .each(new Fields("wordcount"), new TopNFunction(5), new Fields("rank", "word", "count"))
                .partitionPersist(hBaseStateFactory, new Fields("rank", "word", "count"), new HBaseUpdater(), new Fields());

        Config conf = new Config();
        conf.put("hbase", new HashMap<String, Object>());
        if (args.length == 0) {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("Top5Topology", conf, topology.build());
        } else {
            conf.setNumWorkers(3);
            StormSubmitter.submitTopologyWithProgressBar(args[0], conf, topology.build());
        }
    }
}
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值