Storm Trident+DRPC实例

需求

Trident实现用户使用浏览器统计
通过DRPC查看结果

开发过程

实现IBatchSpout批量读取日志文件

public class MyBatchSpout implements IBatchSpout {
    Fields fields;
    HashMap<Long, List<List<Object>>> batches = new HashMap();

    public MyBatchSpout(Fields fields) {
        this.fields = fields;
    }

    @Override
    public void open(Map map, TopologyContext topologyContext) {
    }

    @Override
    public void emitBatch(long batchId, TridentCollector tridentCollector) {
        List<List<Object>> batch = (List) this.batches.get(Long.valueOf(batchId));
        if (null == batch) {
            batch = new ArrayList<List<Object>>();
            //读取日志文件列表
            String dataDir = "logs/";
            File file = new File(dataDir);
            Collection<File> listFiles = FileUtils.listFiles(file, new String[]{"log"}, true);
            for (File f : listFiles) {
                List<String> readLines = null;
                try {
                    readLines = FileUtils.readLines(f);
                    for (String line : readLines) {
                        batch.add(new Values(line));
                    }
                } catch (IOException e) {
                    e.printStackTrace();
                }
                // 文件已经处理完成,在末尾添加done和时间戳,避免重复读取
                try {
                    File srcFile = f.getAbsoluteFile();
                    File destFile = new File(srcFile + ".done." + System.currentTimeMillis());
                    FileUtils.moveFile(srcFile, destFile);
                } catch (Exception e) {
                    e.printStackTrace();
                }
                this.batches.put(batchId, batch);
            }

        }
        for (List<Object> list : batch) {
            tridentCollector.emit(list);
        }
    }

    @Override
    public void ack(long batchId) {
        this.batches.remove(Long.valueOf(batchId));
    }

    @Override
    public void close() {

    }

    @Override
    public Map<String, Object> getComponentConfiguration() {
        Config conf = new Config();
        conf.setMaxTaskParallelism(1);
        return conf;
    }

    @Override
    public Fields getOutputFields() {
        return this.fields;
    }
}

实现BaseFunction,从每行日志中分割出IP地址和浏览器信息,合并成一个新字段IP_browser

public static class Split extends BaseFunction{
        @Override
        public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
            String line=tridentTuple.getString(0);
            String[] words=line.split("\"");
            String IP=words[0].split(" ")[0];
            String browser=words[5];
            if (!browser.equals("-")){
                String IP_browser=IP+"_"+browser;
                tridentCollector.emit(new Values(IP_browser));
            }
        }
    }

实现BaseAggregator,剔除重复的IP_browser,发送新字段IP和数量1

public static class Aggregate extends BaseAggregator<Map<String,Integer>>{

        @Override
        public Map<String,Integer> init(Object o, TridentCollector tridentCollector) {
            return new HashMap<String,Integer>() ;
        }

        @Override
        public void aggregate(Map<String,Integer> map, TridentTuple tridentTuple, TridentCollector tridentCollector) {
            String IP_browser=tridentTuple.getStringByField("IP_browser");
            String[] words=IP_browser.split("_");
           String IP=words[0];
            map.put(IP,1);
        }

        @Override
        public void complete(Map<String,Integer> map, TridentCollector tridentCollector) {
            for (Map.Entry<String,Integer> e:map.entrySet()){
                tridentCollector.emit(new Values(e.getKey()));
            }
        }
    }

创建TridentTopology,提供用于构建Trident实时计算程序的一些接口
创建一个TridentState对象,通过newStream方法从指定的Spout创建一个新的数据输入流
日志拆分合并新字段->聚合去重复->count统计数量
创建DRPC流,函数命名为browserFunction

public static StormTopology buildTopology(LocalDRPC drpc) {
        TridentTopology topology = new TridentTopology();

        MyBatchSpout spout = new MyBatchSpout(new Fields("line"));
        TridentState tridentState=topology.newStream("spout",spout)
                .parallelismHint(4)
                .each(new Fields("line"),new Split(),new Fields("IP_browser"))
                .parallelismHint(4)
                .groupBy(new Fields("IP_browser"))
                .partitionAggregate(new Fields("IP_browser"),new Aggregate(),new Fields("IP"))
                .toStream()
                .parallelismHint(4)
                .groupBy(new Fields("IP"))
                .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"));
        topology.newDRPCStream("browserFunction",drpc)
                .stateQuery(tridentState,new Fields("args"),new MapGet(),new Fields("count"))
                .each(new Fields("count"),new FilterNull());
        return topology.build();
    }

本地模式运行DRPC

Config conf = new Config();
        conf.setMaxSpoutPending(20);
        if (args.length == 0) {
            LocalDRPC drpc = new LocalDRPC();
            //创建LocalDRPC对象在进程内模拟一个DRPC服务器(类似于LocalCluster在进程内模拟一个Storm集群)
            LocalCluster cluster = new LocalCluster();
            //创建LocalCluster对象在本地模式运行topology
            cluster.submitTopology("wordCounter", conf, buildTopology(drpc));
            for (int i = 0; i < 100; i++) {
                System.out.println("DRPC RESULT: " + drpc.execute("browserFunction", "114.112.141.6"));
                Thread.sleep(1000);
            }

运行结果

这里写图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值