logparser使用:解析nginx-access.log文件

1. java-maven加入依赖

<dependency>
    <groupId>nl.basjes.parse.httpdlog</groupId>
    <artifactId>httpdlog-parser</artifactId>
    <version>5.7</version>
</dependency>

2. 定义log日志格式

2.1 理解nginx的access.log文件
nginx access.log 有默认的数据格式
#log_format  main  '$remote_addr - $remote_user [$time_local] [$msec]
#					[$request_time] [$http_host] "$request" '
#                  '$status $body_bytes_sent $request_body "$http_referer" '
#                  '"$http_user_agent" "$http_x_forwarded_for"';

***释义

$remote_addr 远程访问地址
$remote_user 远程访问用户
$time_local 本地时间(带时区)
$msec 访问时间字符串形式
$request_time 请求时间
$http_host 请求域名
$request 访问的请求
$status 状态
$body_bytes_sent 表示请求服务器给客户端body体的大小
$request_body 请求体
$http_referer 记录从那个页面访问过来的
$http_user_agent 记录客户浏览器的相关信息
$http_x_forwarded_for 请求转发过来的地址
$upstream_response_time 从nginx建立连接、接收数据、关闭连接的时间
2.1 自定义logparser识别access.log的格式
%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookie}i\" \"%{Addr}i\"

详细查看:https://httpd.apache.org/docs/current/mod/mod_log_config.html

3. logparser官网使用案例

https://github.com/nielsbasjes/logparser
3.1 创建一个日志记录对应的Java Bean
package com.itcast.common.entry;

/*
 * Apache HTTPD & NGINX Access log parsing made easy
 * Copyright (C) 2011-2021 Niels Basjes
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import nl.basjes.parse.core.Field;

import java.util.HashMap;
import java.util.Map;
import java.util.TreeSet;

public class NginxLogRecord {

    private final Map<String, String> results = new HashMap<>(32);

    @Field("STRING:request.firstline.uri.query.*")
    public void setQueryDeepMany(final String name, final String value) {
        results.put(name, value);
    }

    @Field("STRING:request.firstline.uri.query.img")
    public void setQueryImg(final String name, final String value) {
        results.put(name, value);
    }

    @Field("IP:connection.client.host")
    public void setIP(final String value) {
        results.put("IP:connection.client.host", value);
    }

    @Field({
            "HTTP.QUERYSTRING:request.firstline.uri.query",
            "NUMBER:connection.client.logname",
            "STRING:connection.client.user",
            "TIME.STAMP:request.receive.time",
            "HTTP.URI:request.firstline.uri",
            "BYTESCLF:response.body.bytes",
            "HTTP.URI:request.referer",
            "HTTP.USERAGENT:request.user-agent",
            "TIME.DAY:request.receive.time.day",
            "TIME.HOUR:request.receive.time.hour",
            "TIME.MONTHNAME:request.receive.time.monthname"
    })
    public void setValue(final String name, final String value) {
        results.put(name, value);
    }

    public String toString() {
        StringBuilder sb = new StringBuilder();
        TreeSet<String> keys = new TreeSet<>(results.keySet());
        for (String key : keys) {
            sb.append(key).append(" = ").append(results.get(key)).append('\n');
        }

        return sb.toString();
    }

    public void clear() {
        results.clear();
    }
}

3.2 创建一个测试类
package com.itcast.common.logparser;

/*
 * Apache HTTPD & NGINX Access log parsing made easy
 * Copyright (C) 2011-2021 Niels Basjes
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import com.itcast.common.entry.NginxLogRecord;
import nl.basjes.parse.core.Parser;
import nl.basjes.parse.core.exceptions.DissectionFailure;
import nl.basjes.parse.core.exceptions.InvalidDissectorException;
import nl.basjes.parse.core.exceptions.MissingDissectorsException;
import nl.basjes.parse.httpdlog.HttpdLoglineParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

public final class LogParserTest {

    private static void printAllPossibles(String logformat) throws NoSuchMethodException, MissingDissectorsException, InvalidDissectorException {
        // To figure out what values we CAN get from this line we instantiate the parser with a dummy class
        // that does not have ANY @Field annotations.
        // TODO 3.1 构建解析器
        Parser<Object> dummyParser= new HttpdLoglineParser<>(Object.class, logformat);

        List<String> possiblePaths = dummyParser.getPossiblePaths();

        // If you want to call 'getCasts' then the actual parser needs to be constructed.
        // Simply calling getPossiblePaths does not build the actual parser.
        // Because we want this for all possibilities yet we are never actually going to use this instance of the parser
        // We simply give it a random method with the right signature and tell it we want all possible paths
        dummyParser.addParseTarget(String.class.getMethod("indexOf", String.class), possiblePaths);

        System.out.println("==================================");
        System.out.println("Possible output:");
        for (String path : possiblePaths) {
            System.out.println("{}     {}" + path + dummyParser.getCasts(path));
        }
        System.out.println("==================================");
    }

    public static void run() throws InvalidDissectorException, MissingDissectorsException, NoSuchMethodException, DissectionFailure {

        // This format and logline originate from here:
        // https://stackoverflow.com/questions/20349184/java-parse-log-file
        // TODO 1. 自定义日志格式 识别
        String logformat = "%t %u [%D %h %{True-Client-IP}i %{UNIQUE_ID}e %r] %{Cookie}i %s \"%{User-Agent}i\" \"%{host}i\" %l %b %{Referer}i";
        // TODO 2. 给定该格式的日志行 进行测试用
        String logline = "[02/Dec/2013:14:10:30 -0000] - [52075 10.102.4.254 177.43.52.210 UpyU1gpmBAwAACfd5W0AAAAW GET /SS14-VTam-ny_019.j" +
                "pg.rendition.zoomable.jpg HTTP/1.1] hsfirstvisit=http%3A%2F%2Fwww.domain.com%2Fen-us||1372268254000; _opt_vi_3FNG8DZU=F870" +
                "DCFD-CBA4-4B6E-BB58-4605A78EE71A; __ptca=145721067.0aDxsZlIuM48.1372279055.1379945057.1379950362.9; __ptv_62vY4e=0aDxsZlIu" +
                "M48; __pti_62vY4e=0aDxsZlIuM48; __ptcz=145721067.1372279055.1.0.ptmcsr=(direct)|ptmcmd=(none)|ptmccn=(direct); __hstc=1457" +
                "21067.b86362bb7a1d257bfa2d1fb77e128a85.1372268254968.1379934256743.1379939561848.9; hubspotutk=b86362bb7a1d257bfa2d1fb77e1" +
                "28a85; USER_GROUP=julinho%3Afalse; has_js=1; WT_FPC=id=177.43.52.210-1491335248.30301337:lv=1385997780893:ss=1385997780893" +
                "; dtCookie=1F2E0E1037589799D8D503EB8CFA12A1|_default|1; RM=julinho%3A5248423ad3fe062f06c54915e6cde5cb45147977; wcid=UpyKsQ" +
                "pmBAwAABURyNoAAAAS%3A35d8227ba1e8a9a9cebaaf8d019a74777c32b4c8; Carte::KerberosLexicon_getWGSN=82ae3dcd1b956288c3c86bdbed6e" +
                "bcc0fd040e1e; UserData=Username%3AJULINHO%3AHomepage%3A1%3AReReg%3A0%3ATrialist%3A0%3ALanguage%3Aen%3ACcode%3Abr%3AForceRe" +
                "Reg%3A0; UserID=1356673%3A12345%3A1234567890%3A123%3Accode%3Abr; USER_DATA=1356673%3Ajulinho%3AJulio+Jose%3Ada+Silva%3Ajul" +
                "inho%40tecnoblu.com.br%3A0%3A1%3Aen%3Abr%3A%3AWGSN%3A1385990833.81925%3A82ae3dcd1b956288c3c86bdbed6ebcc0fd040e1e; MODE=FON" +
                "TIS; SECTION=%2Fcontent%2Fsection%2Fhome.html; edge_auth=ip%3D177.43.52.210~expires%3D1385994522~access%3D%2Fapps%2F%2A%21" +
                "%2Fbin%2F%2A%21%2Fcontent%2F%2A%21%2Fetc%2F%2A%21%2Fhome%2F%2A%21%2Flibs%2F%2A%21%2Freport%2F%2A%21%2Fsection%2F%2A%21%2Fw" +
                "gsn%2F%2A~md5%3D90e73ee10161c1afacab12c6ea30b4ef; __utma=94539802.1793276213.1372268248.1385572390.1385990581.16; __utmb=9" +
                "4539802.52.9.1385991739764; __utmc=94539802; __utmz=94539802.1372268248.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none);" +
                " WT_FPC=id=177.43.52.210-1491335248.30301337:lv=1386000374581:ss=1386000374581; dtPC=-; NSC_wtfswfs_xfcgbsn40-41=ffffffff0" +
                "96e1a1d45525d5f4f58455e445a4a423660; akamai-edge=5ac6e5b3d0bbe2ea771bb2916d8bab34ea222a6a 200 \"Mozilla/5.0 (Windows NT 6." +
                "2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36\" \"www.domain.com\" - 463952 http://ww" +
                "w.domain.com/content/report/shows/New_York/KSHK/trip/s_s_14_ny_ww/sheers.html";

        // TODO 3. 打印所有解析出来的参数
        printAllPossibles(logformat);

        // TODO 4. 定义一个解析器 传递我们的java bean
        Parser<NginxLogRecord> parser = new HttpdLoglineParser<>(NginxLogRecord.class, logformat);
        // 此处代码相当于下面的绑定方式 一般我们需要的使用到的不多 便可以在此处实现 绑定 比较灵活
        parser.addParseTarget("setQueryDeepMany", "STRING:request.firstline.uri.query.*");
//        @Field("STRING:request.firstline.uri.query.*")
//        public void setQueryDeepMany(final String name, final String value) {
//            results.put(name, value);
//        }
        // TODO 5. 实例化一个Java bean
        NginxLogRecord record = new NginxLogRecord();

        System.out.println("==================================================================================");
        // TODO 6. 使用能识别该格式的解析器 解析日志 并赋值给 Java bean
        parser.parse(record, logline);
        // TODO 7. 输出解析后的bean对象
        System.out.println(record.toString());
        System.out.println("==================================================================================");
    }

    public static void main(final String[] args) throws Exception {
        run();
    }

}

3.3 测试效果

在这里插入图片描述
在这里插入图片描述

4. 自我实现

4.1 定义一个java bean
4.2 代码实现解析
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值