使用python解析nginx访问日志

性能测试时,需使用生产环境各接口请求比例分配接口请求比,nginx统计脚本如下:

import re
import pandas as pd
import xlwt

obj = re.compile(
    r'(?P<ip>.*?)- - \[(?P<time>.*?)\] "(?P<request>.*?)" (?P<request_time>.*?) (?P<status>.*?) (?P<bytes>.*?) "(?P<referer>.*?)" "(?P<ua>.*?)"')


def load_log(path):
    lst = []
    error_lst = []
    i = 0
    with open(path, mode="r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            dic = parse(line)
            if dic:
                lst.append(dic)
            else:
                error_lst.append(line)
            i += 1

    return lst, error_lst

def NumIn(s):
    for char in s:
        if char.isdigit():
            return True
    return False

def parse(line):
    dic = {}
    try:
        result = obj.match(line)

        time = result.group("time")
        time = time.replace(" +0800", "")
        time_min = time[:17]
        time_10min = time[:16]
        time_hour = time[:14]
        dic['time'] = time
        dic['time_min'] = time_min
        dic['time_10min'] = time_10min
        dic['time_hour'] = time_hour

        request = result.group("request")
        a = request.split()[1].split("?")[0]
        c = '/'.join(a.split('/')[:5])
        b = request.split()[0]
        for item in c.split('/')[4]:
            if NumIn(item):
                c='/'.join(a.split('/')[:4])
        dic['request'] = b + " " + c

        return dic

    except:
        return False

def analyse(lst,project):
    df = pd.DataFrame(lst)
    df = df[df['request'].str.contains(project)]
    request_time_count = pd.value_counts(df['time']).reset_index().rename(columns={"index": "time", "time": "count"}).iloc[:100, :]
    request_time_min_count = pd.value_counts(df['time_min']).reset_index().rename(columns={"index": "time_min", "time_min": "count"}).iloc[:100, :]
    request_time_10min_count = pd.value_counts(df['time_10min']).reset_index().rename(columns={"index": "time_10min", "time_10min": "count"}).iloc[:100, :]
    request_time_hour_count = pd.value_counts(df['time_hour']).reset_index().rename(columns={"index": "time_hour", "time_hour": "count"}).iloc[:24, :]
    request_count = pd.value_counts(df['request']).reset_index().rename(columns={"index": "request", "request": "count"}).iloc[:, :]
    request_time_count_values = request_time_count.values
    request_time_min_count_values = request_time_min_count.values
    request_time_10min_count_values = request_time_10min_count.values
    request_time_hour_count_values = request_time_hour_count.values
    request_count_values = request_count.values


    wb = xlwt.Workbook()

    sheet = wb.add_sheet("url请求次数及占比")
    row = 0
    sheet.write(row, 0, "request_url")
    sheet.write(row, 1, "request_type")
    sheet.write(row, 2, "count")
    sheet.write(row, 3, "百分比")
    sheet.write(row, 4, "请求总数")
    row += 1
    sheet.write(row, 4, df.shape[0])
    for item in request_count_values:
        sheet.write(row, 0, item[0].split(" ")[1])
        sheet.write(row, 1, item[0].split(" ")[0])
        sheet.write(row, 2, item[1])
        sheet.write(row, 3, "%.2f%%" % (round(float(item[1]/df.shape[0]) * 100, 2)))
        row += 1

    sheet = wb.add_sheet("秒级请求数top100")

    row = 0
    sheet.write(row, 0, "time")
    sheet.write(row, 1, "count")
    row += 1
    for item in request_time_count_values:
        sheet.write(row, 0, item[0])
        sheet.write(row, 1, item[1])
        row += 1

    sheet = wb.add_sheet("分钟请求数top100")

    row = 0
    sheet.write(row, 0, "time_min")
    sheet.write(row, 1, "count")
    row += 1
    for item in request_time_min_count_values:
        sheet.write(row, 0, item[0]+':00'+"-"+item[0]+':59')
        sheet.write(row, 1, item[1])
        row += 1

    sheet = wb.add_sheet("10分钟请求数top100")

    row = 0
    sheet.write(row, 0, "time10")
    sheet.write(row, 1, "count")
    row += 1
    for item in request_time_10min_count_values:
        sheet.write(row, 0, item[0]+'0:00'+"-"+item[0]+'9:59')
        sheet.write(row, 1, item[1])
        row += 1

    sheet = wb.add_sheet("小时级请求数")

    row = 0
    sheet.write(row, 0, "timehour")
    sheet.write(row, 1, "count")
    row += 1
    for item in request_time_hour_count_values:
        sheet.write(row, 0, item[0]+':00:00'+"-"+item[0]+':59:59')
        sheet.write(row, 1, item[1])
        row += 1



    wb.save("nginx_log.xls")

if __name__ == '__main__':
    lst, error_lst = load_log(path="D:\Desktop\\****imc.log")
    analyse(lst,project='/SVC***/')

统计结果如下:

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值