[Python3]处理Nginx日志拉取接口请求的部分字段做回放

本文链接：https://blog.csdn.net/mengwuyoulin/article/details/125770336

需求：接口用例编写，构造请求入参
方案：解析Nginx日志，拉取接口URL，请求参数，请求状态
策略：
1、对重复的接口去重
2、开启多个线程并行处理
3、去除静态文件的请求
待解决：
1、去重方式调整
2、并发处理同一个文件方案待定
3、接口请求量的统计
4、接口请求method获取

Nginx日志格式如下

{"request_body":"-","server_addr":"xx.xx.xx.xx","tid":"xx-xxxx-xx.xx.x.xx-x-xx","remote_addr":"xx.xx.xx.xx","start_time":"165773339.92","end_time":"165333.920","uri":"/","reqcode":"","upstream_addr":"-","upstream_response_time":"-","sessionid":"-","status":200}

# @File    : nginx_api_analyse
# @Description:
# @Url     :
# @Author  : lyx

import re
import sys
import pandas as pd

def DrlRepeat(data):
    new_data = [] #  用来存放去重后的字典列表
    values = [] # 用来存放当前已有的值
    for dic in data:
        if dic['path'] not in values:
            values.append(dic['path'])
            new_data.append(dic)
    return new_data

def load_log(path):
    import datetime
    from multiprocessing import Pool
    lst = []
    error_lst = []
    i = 0
    pool = Pool(4) # 多个线程
    with open(path, mode="r", encoding="utf-8", errors='ignore') as f:
        for line in f:
            dic = pool.apply_async(parse,args=(line,))
            if dic.get():  # 正确的数据添加到lst列表中
                lst.append(dic.get())
            else:
                error_lst.append(line)  # 脏数据添加到error_lst列表中
            i += 1
    result_list = DrlRepeat(lst)
    if '\\' in path:
        filename=path.split('\\')[-1].split('.')
    else:
        filename=path.split('/')[-1].split('.')
    pd.DataFrame(result_list).to_excel(f'%s所有接口数据%s.xlsx'%(filename[0],datetime.date.today()))


def parse(line):
    # 解析单行nginx日志
    dic = {}
    try:
        result = eval(line)
        params_get = None
        params_post = None
        # url处理
        request_uri = result['request_uri'] if 'request_uri' in result else result['uri']
        if '?' in request_uri:
            request_url = request_uri.split('?')
            uri = request_url[0]
            params_get = request_url[1]
        else:
            uri = request_uri
            params_post = result['request_body']
        path = uri
        # reqcode
        reqcode = result['reqcode'] if 'reqcode' in result else None
        # path 处理
        if path =='/' or path =='' or path == '-' or re.search('.*?\.(js|css|png|txt|gif|ttf)$',path):
            return False
        if reqcode and reqcode != '-' and reqcode != '' and reqcode != '0' and reqcode !='-' and reqcode !=0:
            path += '_'+reqcode
        # 状态码处理
        status = result['status']  # 状态码
        dic['request_uri'] = uri
        dic['reqcode'] = reqcode
        dic['path'] = path
        dic['params_get'] = params_get
        dic['params_post'] = params_post
        dic['status'] = status
        return dic

    except:
        return False

if __name__ == '__main__':
    load_log(sys.argv[1])