StromTrident和Hbase 实现热门搜索

10 篇文章 0 订阅
8 篇文章 0 订阅

统计APP中,全量用户的搜索统计,并且列出TopN
主要流程如下:
1.APP端搜索的搜索数据通过推送到Kafka中
2.Strom 与 Kafka 有 native的接口OpaqueTridentKafkaSpout
3.Storm中应用到SlidingDurationWindow即窗口统计
4. 数据写入到Hbase当中TridentHBaseMapper

code:


    static Logger logger= LoggerFactory.getLogger(TopNTopology.class);

    private static class TopNFuncation extends BaseFunction{

        private int TopN;

        public TopNFuncation(int TopN){
            this.TopN=TopN;
        }
        public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
            HashMap<String,Long> hashMap=new HashMap<String,Long>();
            List<Map.Entry<String,Long>> entryList=new ArrayList<Map.Entry<String, Long>>();
            //对list进行排序,升序排序
            Collections.sort(entryList, new Comparator<Map.Entry<String, Long>>() {
                public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
                    return o1.getValue().compareTo(o2.getValue());
                }
            });
            //去除TopN,定义i,如果i大于TopN则
            int i=1;
            for (int j=entryList.size()-1;j>0;j--){
                if(i>TopN){
                    break;
                }
                logger.info("the top is "+String.valueOf(j) +" the key is "+entryList.get(j).getKey() +" the values is "+String.valueOf(entryList.get(j).getValue()));
                tridentCollector.emit(new Values(String.valueOf(j),entryList.get(j).getKey(),String.valueOf(entryList.get(j).getValue())));
            }
        }
    }


    private static class SplitFunction implements FlatMapFunction {

        public Iterable<Values> execute(TridentTuple tridentTuple) {
            ArrayList<Values> list=new ArrayList<Values>();
            String sentencse=tridentTuple.getString(0);
            String[] str=sentencse.split(" ");
            for(String word:str){
                list.add(new Values(word));
            }
            return list;
        }
    }

    private static class WordAggreator extends BaseAggregator<HashMap<String,Long>>{

        public HashMap<String, Long> init(Object o, TridentCollector tridentCollector) {
            return new HashMap<String, Long>();
        }

        public void aggregate(HashMap<String, Long> stringLongHashMap, TridentTuple tridentTuple, TridentCollector tridentCollector) {
            String word=tridentTuple.getStringByField("word");
            long count=0;
            if(stringLongHashMap.containsKey(word)){
                count+=stringLongHashMap.get(word);
            }
            stringLongHashMap.put(word,count);
        }

        public void complete(HashMap<String, Long> stringLongHashMap, TridentCollector tridentCollector) {
            tridentCollector.emit(new Values(stringLongHashMap));
        }
    }

    public static void main(String []args) {
        TridentTopology tridentTopology = new TridentTopology();
        String zks = "10.1.69.11:2181,10.1.69.12:2181,10.1.69.13:2181";
        String topic = "stormKafka";
        String zkRoot = "/storm";
        String id = "stormKafka";
        BrokerHosts brokerHosts = new ZkHosts(zks);

        // 定义Trident拓扑,从Kafka中获取数据
        TridentKafkaConfig spoutConf = new TridentKafkaConfig(brokerHosts, topic);
        spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
        OpaqueTridentKafkaSpout spout = new OpaqueTridentKafkaSpout(spoutConf);

        //数据写入到Hbase当中
        TridentHBaseMapper tridentHBaseMapper = new SimpleTridentHBaseMapper()
                .withColumnFamily("result")
                .withColumnFields(new Fields("word", "count"))
                .withRowKeyField("rank");

        HBaseState.Options options = new HBaseState.Options()
                .withConfigKey("hbase")
                .withDurability(Durability.SYNC_WAL)
                .withMapper(tridentHBaseMapper)
                .withTableName("TopHot");
        StateFactory hBaseStateFactory = new HBaseStateFactory(options);
        //统计10S内的窗口数据
        WindowConfig durationWindow = SlidingDurationWindow.of(BaseWindowedBolt.Duration.seconds(10), BaseWindowedBolt.Duration.seconds(10));
        //数据从Kafka读取
        tridentTopology.newStream("spout", spout)
                .flatMap(new SplitFunction(), new Fields("word"))
                .window(durationWindow, new Fields("word"), new WordAggreator(), new Fields("wordCount"))
                .each(new Fields("wordCount"), new TopNFuncation(Integer.valueOf(args[0])), new Fields("rank", "key", "value"))
                .partitionPersist(hBaseStateFactory, new Fields("rank", "key", "value"), new HBaseUpdater(), new Fields());


        Config conf = new Config();
        Map<String, Object> hbConf = new HashMap<String, Object>();
        conf.put("hbase", hbConf);
        if (args.length == 0) {
            LocalCluster cluster = new LocalCluster();
            cluster.submitTopology("TopNTopology", conf, tridentTopology.build());
        } else {
            conf.setNumWorkers(3);
            try {
                StormSubmitter.submitTopologyWithProgressBar(args[1], conf, tridentTopology.build());
            } catch (AlreadyAliveException e) {
                e.printStackTrace();
            } catch (InvalidTopologyException e) {
                e.printStackTrace();
            } catch (AuthorizationException e) {
                e.printStackTrace();
            }
        }
    }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值