Nginx 访问日志的 JSON 格式化及日志分析脚本

注释版

log_format json '{
    "time_local": "$time_iso8601",         # 本地时间,格式为 ISO 8601
    "msec": $msec,                         # 当前时间的毫秒数
    "remote_addr": "$remote_addr",         # 客户端的 IP 地址
    "remote_user": "$remote_user",         # 客户端的用户名(如果有通过 HTTP 基本认证提供)
    "body_bytes_sent": $body_bytes_sent,   # 发送给客户端的主体内容的字节数
    "content_length": "$content_length",   # 请求的 Content-Length 头部字段的值
    "upstream_response_length": "$upstream_response_length", # 来自上游服务器的响应长度
    "upstream_addr": "$upstream_addr",     # 处理请求的上游服务器的地址
    "upstream_connect_time": "$upstream_connect_time", # 与上游服务器建立连接的时间
    "bytes_sent": $bytes_sent,             # 发送给客户端的总字节数
    "request_length": $request_length,     # 请求的总长度(包括请求行、请求头和请求主体)
    "connection_requests": $connection_requests, # 当前连接处理的请求数
    "http_status": $status,                # 响应的 HTTP 状态码
    "schema": "$scheme",                   # 请求的协议(HTTP 或 HTTPS)
    "uri": "$uri",                         # 请求的 URI
    "http_cookie": "$http_cookie",         # 请求头中的 Cookie 信息
    "request_uri": "$request_uri",         # 请求的完整 URI,包括参数
    "query_string": "$query_string",       # 请求的查询字符串
    "method": "$request_method",           # 请求的方法(GET、POST 等)
    "request_time": $request_time,         # 处理请求所用的时间
    "upstream_response_time": "$upstream_response_time", # 上游服务器响应的时间
    "upstream_status": "$upstream_status", # 上游服务器响应的 HTTP 状态码
    "http_host": "$http_host",             # 请求头中的 Host 字段的值
    "http_referrer": "$http_referer",      # 请求头中的 Referer 字段的值
    "http_x_forwarded_for": "$http_x_forwarded_for", # 请求头中的 X-Forwarded-For 字段的值
    "sla_appname": "$cookie_sla_appname",  # 来自 Cookie 的 sla_appname 字段的值
    "http_user_agent": "$http_user_agent", # 请求头中的 User-Agent 字段的值
    "host": "$host",                       # 请求头中的 Host 字段的值
    "request": "$request",                 # 请求行,包括方法、URI 和 HTTP 版本
    "server_addr": "$server_addr",         # 处理请求的服务器的 IP 地址
    "request_id": "$http_x_request_id"     # 请求头中的 X-Request-ID 字段的值
}';

纯享版

log_format json '
{
    "time_local": "$time_iso8601",
    "msec": $msec,
    "remote_addr": "$remote_addr",
    "remote_user": "$remote_user",
    "body_bytes_sent": $body_bytes_sent,
    "content_length": "$content_length",
    "upstream_response_length": "$upstream_response_length",
    "upstream_addr": "$upstream_addr",
    "upstream_connect_time": "$upstream_connect_time",
    "bytes_sent": $bytes_sent,
    "request_length": $request_length,
    "connection_requests": $connection_requests,
    "http_status": $status,
    "schema": "$scheme",
    "uri": "$uri",
    "http_cookie": "$http_cookie",
    "request_uri": "$request_uri",
    "query_string": "$query_string",
    "method": "$request_method",
    "request_time": $request_time,
    "upstream_response_time": "$upstream_response_time",
    "upstream_status": "$upstream_status",
    "http_host": "$http_host",
    "http_referrer": "$http_referer",
    "http_x_forwarded_for": "$http_x_forwarded_for",
    "sla_appname": "$cookie_sla_appname",
    "http_user_agent": "$http_user_agent",
    "host": "$host",
    "request": "$request",
    "server_addr": "$server_addr",
    "request_id": "$http_x_request_id"
}';

sh日志分析脚本(慢)

#!/bin/bash

# 检查是否提供了日志文件参数
if [ $# -eq 0 ]; then
  echo "使用方法: $0 <日志文件>"
  exit 1
fi

log_file=$1

# 检查日志文件是否存在
if [ ! -f "$log_file" ]; then
  echo "错误: 文件 '$log_file' 不存在"
  exit 1
fi

total_requests=0
declare -A status_counts
total_request_time=0
declare -A client_ips
declare -A user_agents

json_line=""

while IFS= read -r line; do
  # 合并 JSON 对象的多行
  json_line="$json_line$line"

  # 如果一行结束了 JSON 对象,则解析 JSON
  if [[ "$line" == *"}" ]]; then
    # 验证 JSON 格式
    echo "$json_line" | jq empty > /dev/null 2>&1
    if [ $? -ne 0 ]; then
      echo "跳过格式不正确的行: $json_line"
      json_line=""
      continue
    fi

    total_requests=$((total_requests + 1))

    http_status=$(echo "$json_line" | jq -r '.http_status // empty')
    request_time=$(echo "$json_line" | jq -r '.request_time // empty')
    remote_addr=$(echo "$json_line" | jq -r '.remote_addr // empty')
    http_user_agent=$(echo "$json_line" | jq -r '.http_user_agent // empty')

    # 检查字段是否为空
    if [[ -n "$http_status" ]]; then
      # 统计状态码
      if [[ -n "${status_counts[$http_status]}" ]]; then
        status_counts[$http_status]=$((status_counts[$http_status] + 1))
      else
        status_counts[$http_status]=1
      fi
    fi

    if [[ -n "$request_time" ]]; then
      # 计算请求总时间
      total_request_time=$(echo "$total_request_time + $request_time" | bc)
    fi

    if [[ -n "$remote_addr" ]]; then
      # 统计客户端 IP 的请求次数
      if [[ -n "${client_ips[$remote_addr]}" ]]; then
        client_ips[$remote_addr]=$((client_ips[$remote_addr] + 1))
      else
        client_ips[$remote_addr]=1
      fi
    fi

    if [[ -n "$http_user_agent" ]]; then
      # 统计用户代理
      if [[ -n "${user_agents[$http_user_agent]}" ]]; then
        user_agents[$http_user_agent]=$((user_agents[$http_user_agent] + 1))
      else
        user_agents[$http_user_agent]=1
      fi
    fi

    json_line=""
  fi
done < "$log_file"

if [ "$total_requests" -gt 0 ]; then
  avg_request_time=$(echo "scale=3; $total_request_time / $total_requests" | bc)
else
  avg_request_time=0
fi

echo "总请求数: $total_requests"
echo "状态码统计:"

for status in "${!status_counts[@]}"; do
  echo "  $status: ${status_counts[$status]}"
done

echo "平均请求时间: $avg_request_time 秒"

echo "客户端 IP 统计:"

for ip in "${!client_ips[@]}"; do
  # 获取 IP 地址信息
  response=$(curl -s "http://ip-api.com/json/$ip?lang=zh-CN")
  city=$(echo "$response" | jq -r '.city // empty')
  region=$(echo "$response" | jq -r '.regionName // empty')
  country=$(echo "$response" | jq -r '.country // empty')

  if [ "$city" != "null" ] && [ "$region" != "null" ] && [ "$country" != "null" ]; then
    location="$city $region $country"
  else
    location="未知"
  fi

  echo "  $ip: ${client_ips[$ip]} ($location)"
done

echo "用户代理统计:"

for agent in "${!user_agents[@]}"; do
  echo "  $agent: ${user_agents[$agent]}"
done

python3日志分析脚本(快)

#!/usr/bin/env python3

import sys
import json
import requests
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor, as_completed

# 设定最大并发请求数
MAX_CONCURRENT_REQUESTS = 10

def parse_log_file(log_file):
    total_requests = 0
    status_counts = defaultdict(int)
    total_request_time = 0
    client_ips = defaultdict(int)
    user_agents = defaultdict(int)

    with open(log_file, 'r') as file:
        json_line = ""
        for line in file:
            json_line += line

            if "}" in line:
                try:
                    log_entry = json.loads(json_line)
                except json.JSONDecodeError:
                    json_line = ""
                    continue

                total_requests += 1

                http_status = log_entry.get('http_status', '')
                request_time = log_entry.get('request_time', 0)
                remote_addr = log_entry.get('remote_addr', '')
                http_user_agent = log_entry.get('http_user_agent', '')

                if http_status:
                    status_counts[http_status] += 1

                if request_time:
                    total_request_time += float(request_time)

                if remote_addr:
                    client_ips[remote_addr] += 1

                if http_user_agent:
                    user_agents[http_user_agent] += 1

                json_line = ""

    return total_requests, status_counts, total_request_time, client_ips, user_agents

def get_ip_location(ip):
    try:
        response = requests.get(f'http://ip-api.com/json/{ip}?lang=zh-CN', timeout=5)
        data = response.json()
        city = data.get('city', '未知')
        region = data.get('regionName', '未知')
        country = data.get('country', '未知')
        if city == 'null' or region == 'null' or country == 'null':
            return '未知'
        return f'{city} {region} {country}'
    except requests.RequestException:
        return '未知'

def fetch_ip_locations(ip_list):
    ip_locations = {}
    with ThreadPoolExecutor(max_workers=MAX_CONCURRENT_REQUESTS) as executor:
        future_to_ip = {executor.submit(get_ip_location, ip): ip for ip in ip_list}
        for future in as_completed(future_to_ip):
            ip = future_to_ip[future]
            try:
                ip_locations[ip] = future.result()
            except Exception as e:
                ip_locations[ip] = '未知'
    return ip_locations

def main():
    if len(sys.argv) != 2:
        print("使用方法: python3 analyze_log.py <日志文件>")
        sys.exit(1)

    log_file = sys.argv[1]

    try:
        total_requests, status_counts, total_request_time, client_ips, user_agents = parse_log_file(log_file)
    except FileNotFoundError:
        print(f"错误: 文件 '{log_file}' 不存在")
        sys.exit(1)

    avg_request_time = total_request_time / total_requests if total_requests > 0 else 0

    print(f"总请求数: {total_requests}")
    print("状态码统计:")
    for status, count in status_counts.items():
        print(f"  {status}: {count}")

    print(f"平均请求时间: {avg_request_time:.3f} 秒")

    print("客户端 IP 统计:")
    ip_list = list(client_ips.keys())
    ip_locations = fetch_ip_locations(ip_list)

    for ip, count in client_ips.items():
        location = ip_locations.get(ip, '未知')
        print(f"  {ip}: {count} ({location})")

    print("用户代理统计:")
    for agent, count in user_agents.items():
        print(f"  {agent}: {count}")

if __name__ == "__main__":
    main()

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

勾魂皮卡丘

咋滴,打算白嫖啊?

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值