Storm之网站实时统计

package com.uplooking.bigdata.storm.test;

import org.apache.storm.Config;
import org.apache.storm.Constants;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.shade.org.apache.commons.io.FileUtils;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.io.File;
import java.io.IOException;
import java.util.*;

/**
 * 实时统计某个网站PV和UV
 * PV
 *  page view
 *  网站的点击率
 * UV
 *  独立访客的数量
 *
 *  作业:
 *  1、求出访问量最高的3个IP,其对应的访问次数
 *  2、在此基础之上要求大家统计每一个时段[间隔1个小时]内的pv和uv值
 *
 */
public class PVAndUVSumTopology {
    static class PUSpout extends BaseRichSpout {
        private Map conf;
        private TopologyContext context;
        private SpoutOutputCollector collector;
        public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
            this.conf = conf;
            this.context = context;
            this.collector = collector;
        }

        //监听一个目录新文件的产生
        public void nextTuple() {
            Collection<File> files = FileUtils.listFiles(new File("E:/test/storm"), new String[]{"log"}, true);
            try {
                for (File file : files) {
                    List<String> lines = FileUtils.readLines(file, "UTF-8");
                    for (String line : lines) {
                        collector.emit(new Values(line));
                    }
                    //处理完毕当前文件之后,对该文件设置标识
                    FileUtils.moveFile(file, new File(file.getAbsolutePath() + "." + System.currentTimeMillis()));
                }
            } catch (IOException e) {
                //啥事不干
            }
        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("line"));
        }
    }

    static class SplitBolt extends BaseRichBolt {
        private Map conf;
        private TopologyContext context;
        private OutputCollector collector;

        public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
            this.conf = conf;
            this.context = context;
            this.collector = collector;
        }

        @Override
        public void execute(Tuple tuple) {
            String line = tuple.getStringByField("line");
            String[] splits = line.split("##");
            if(splits == null || splits.length < 1) {
                return;
            }
            //因为我们要进行的统计之和IP有关系,所以没有必要将其所有的数据都发送到下游
            //以免造成网络拥堵
            collector.emit(new Values(splits[0]));
        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("ip"));
        }
    }

    //统计PV和UV值
    static class PUBolt extends BaseRichBolt {
        private Map conf;
        private TopologyContext context;
        private OutputCollector collector;

        public void prepare(Map conf, TopologyContext context, OutputCollector collector) {
            this.conf = conf;
            this.context = context;
            this.collector = collector;
        }

        int pv = 0;
        Set<String> uvSet = new HashSet<String>();
        @Override
        public void execute(Tuple tuple) {
            if(!tuple.getSourceComponent().equalsIgnoreCase(Constants.SYSTEM_COMPONENT_ID)) {
                ++pv;//计算截止到目前为止的PV值
                String ip = tuple.getStringByField("ip");
                uvSet.add(ip);
            } else {
                System.out.println("==============统计结果start===============");
                System.out.println("PV值:" + pv);
                System.out.println("UV值:" + uvSet.size());
                System.out.println("==============统计结果end============");
            }
        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {

        }

        @Override
        public Map<String, Object> getComponentConfiguration() {
            Map<String, Object> conf = new HashMap<>();
            conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, 10);
            return conf;
        }
    }

    public static void main(String[] args) {

        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("puSpout", new PUSpout());
        builder.setBolt("splitBolt", new SplitBolt()).shuffleGrouping("puSpout");
        builder.setBolt("puBolt", new PUBolt()).shuffleGrouping("splitBolt");

        StormTopology stormTopology = builder.createTopology();
        LocalCluster cluster = new LocalCluster();
        String topologyName = PVAndUVSumTopology.class.getSimpleName();
        Config conf = new Config();

        cluster.submitTopology(topologyName, conf, stormTopology);
    }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值