Storm1.1.0<温故而知新--hdfs和storm的集成>

1.环境

apache-storm-1.1.0
Hadoop 2.8.0

使用到的依赖:

<dependencies>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>1.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-hdfs</artifactId>
            <version>1.1.0</version>
        </dependency>
    </dependencies>

需求:读取hdfs上的mapreduce日志文件,统计其中的INFO、WARN、DEBUG、Error日志级别的条数

思路很简单,Spout端读入文件,然后Bolt端做Wordcount(先用正则表达式匹配日志级别)

2.实现

LogLevelCountTopology

用到的参数:

这里写图片描述


import neu.bolt.CountBolt;
import neu.bolt.ExtractBolt;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.hdfs.spout.HdfsSpout;
import org.apache.storm.hdfs.spout.TextFileReader;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;

import java.util.HashMap;


public class LogLevelCountTopology {


    public static void main(String[] args) throws InvalidTopologyException, AuthorizationException, AlreadyAliveException, InterruptedException {
        System.setProperty("HADOOP_USER_NAME", "root");
        if (args.length != 4) {
            System.out.println("Usage <HdfsUri SourceDir ArchiveDir BadFilesDir>");
            System.exit(1);
        }
        TopologyBuilder builder = new TopologyBuilder();
        HdfsSpout hdfsSpout = new HdfsSpout()
                .setReaderType("text")
                .withOutputFields(TextFileReader.defaultFields)
                .setHdfsUri(args[0])
                .setSourceDir(args[1])
                .setArchiveDir(args[2])
                .setBadFilesDir(args[3]);


        HashMap<String, Object> hashMap = new HashMap<>();
        //正则表达式
        hashMap.put(ExtractBolt.REGEX, ".{23}(INFO|DEBUG|WARN|ERROR)");
        hashMap.put(ExtractBolt.FIELD, "line");
        builder.setSpout("hdfsSpout", hdfsSpout, 1);
        builder.setBolt("extractbolt", new ExtractBolt(), 1)
                .addConfigurations(hashMap).shuffleGrouping("hdfsSpout");
        builder.setBolt("countBolt", new CountBolt(), 1)
                .fieldsGrouping("extractbolt", new Fields("level"));

        Config conf = new Config();
        conf.setDebug(true);


        conf.setMaxTaskParallelism(1);
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("hdfsLogLevelCountTopology", conf, builder.createTopology());
        Thread.sleep(90000);
        cluster.shutdown();
    }
}

ExtractBolt


import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ExtractBolt implements IRichBolt {
    public static final String REGEX = "regex";
    public static final String FIELD = "field";
    String field;
    Pattern regex;
    OutputCollector collector;

    public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
        String regexString = (String) stormConf.get(REGEX);
        this.collector = collector;
        this.field = (String) stormConf.get(FIELD);
        this.regex = Pattern.compile(regexString);
    }

    public void execute(Tuple input) {
        String log = input.getStringByField(field);
        if (log != null) {
            Matcher matcher = regex.matcher(log);
            if (matcher.find()) {
                String level = matcher.group(1);
                collector.emit(new Values(level));
            } else {
                System.err.println("不包含INFO|DEBUG|ERROR|WARN 日志:" + log);
            }
        }
        collector.ack(input);
    }

    public void cleanup() {
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("level"));
    }

    public Map<String, Object> getComponentConfiguration() {
        return null;
    }
}

CountBolt


import org.apache.storm.topology.BasicOutputCollector;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseBasicBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.HashMap;
import java.util.Map;


public class CountBolt extends BaseBasicBolt {
    private Map<String, Integer> counts = new HashMap<>();

    public void execute(Tuple input, BasicOutputCollector collector) {
        String level = input.getStringByField("level");
        Integer count = counts.get(level);
        if (count == null)
            count = 0;
        count++;
        counts.put(level, count);
        System.out.println(level + " :  " + count);
        collector.emit(new Values(level, count));
    }

    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        declarer.declare(new Fields("level", "count"));
    }
}

主函数传参示例hdfs://172.17.11.85:9000 /log /ArchiveDir /BadFilesDir

给一张IDE控制台的输出日志:

这里写图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值